From 9b16be0d193484f12088efa92862113629286610 Mon Sep 17 00:00:00 2001 From: "mjw@wray-m-3.hpl.hp.com" Date: Mon, 22 Nov 2004 16:49:15 +0000 Subject: [PATCH] bitkeeper revision 1.1159.183.3 (41a2188bAFjxwwkR-Q5G7XglkWtKfQ) Add vnet files. --- .rootkeys | 58 ++ BitKeeper/etc/ignore | 7 + tools/vnet/00README | 10 + tools/vnet/Makefile | 42 + tools/vnet/doc/vnet-module.txt | 50 + tools/vnet/doc/vnet-xend.txt | 140 +++ tools/vnet/examples/Makefile | 12 + tools/vnet/examples/network-vnet | 218 +++++ tools/vnet/examples/vnet97.sxp | 3 + tools/vnet/examples/vnet98.sxp | 3 + tools/vnet/examples/vnet99.sxp | 3 + tools/vnet/vnet-module/00README | 41 + tools/vnet/vnet-module/Makefile | 67 ++ tools/vnet/vnet-module/Makefile-2.4 | 97 ++ tools/vnet/vnet-module/Makefile-2.6 | 51 + tools/vnet/vnet-module/Makefile.ver | 49 + tools/vnet/vnet-module/Makefile.vnet | 57 ++ tools/vnet/vnet-module/esp.c | 863 +++++++++++++++++ tools/vnet/vnet-module/esp.h | 111 +++ tools/vnet/vnet-module/etherip.c | 411 ++++++++ tools/vnet/vnet-module/etherip.h | 27 + tools/vnet/vnet-module/if_etherip.h | 51 + tools/vnet/vnet-module/if_varp.h | 53 + tools/vnet/vnet-module/linux/pfkeyv2.h | 329 +++++++ tools/vnet/vnet-module/random.c | 101 ++ tools/vnet/vnet-module/random.h | 30 + tools/vnet/vnet-module/sa.c | 670 +++++++++++++ tools/vnet/vnet-module/sa.h | 199 ++++ tools/vnet/vnet-module/sa_algorithm.c | 367 +++++++ tools/vnet/vnet-module/sa_algorithm.h | 63 ++ tools/vnet/vnet-module/skb_context.c | 92 ++ tools/vnet/vnet-module/skb_context.h | 76 ++ tools/vnet/vnet-module/skb_util.c | 515 ++++++++++ tools/vnet/vnet-module/skb_util.h | 43 + tools/vnet/vnet-module/tunnel.c | 228 +++++ tools/vnet/vnet-module/tunnel.h | 101 ++ tools/vnet/vnet-module/varp.c | 1236 +++++++++++++++++++++++ tools/vnet/vnet-module/varp.h | 144 +++ tools/vnet/vnet-module/varp_socket.c | 639 ++++++++++++ tools/vnet/vnet-module/vif.c | 267 +++++ tools/vnet/vnet-module/vif.h | 55 ++ tools/vnet/vnet-module/vnet.c | 767 +++++++++++++++ tools/vnet/vnet-module/vnet.h | 88 ++ tools/vnet/vnet-module/vnet_dev.c | 534 ++++++++++ tools/vnet/vnet-module/vnet_dev.h | 31 + tools/vnet/vnet-module/vnet_ioctl.c | 815 ++++++++++++++++ tools/vnet/vnet-module/vnet_ioctl.h | 25 + tools/vnet/vnetd/Makefile | 103 ++ tools/vnet/vnetd/connection.c | 167 ++++ tools/vnet/vnetd/connection.h | 51 + tools/vnet/vnetd/marshal.c | 223 +++++ tools/vnet/vnetd/marshal.h | 58 ++ tools/vnet/vnetd/select.c | 67 ++ tools/vnet/vnetd/select.h | 32 + tools/vnet/vnetd/timer.c | 154 +++ tools/vnet/vnetd/timer.h | 39 + tools/vnet/vnetd/vcache.c | 639 ++++++++++++ tools/vnet/vnetd/vcache.h | 141 +++ tools/vnet/vnetd/vnetd.c | 1239 ++++++++++++++++++++++++ tools/vnet/vnetd/vnetd.h | 80 ++ 60 files changed, 12832 insertions(+) create mode 100644 tools/vnet/00README create mode 100644 tools/vnet/Makefile create mode 100644 tools/vnet/doc/vnet-module.txt create mode 100644 tools/vnet/doc/vnet-xend.txt create mode 100644 tools/vnet/examples/Makefile create mode 100755 tools/vnet/examples/network-vnet create mode 100644 tools/vnet/examples/vnet97.sxp create mode 100644 tools/vnet/examples/vnet98.sxp create mode 100644 tools/vnet/examples/vnet99.sxp create mode 100644 tools/vnet/vnet-module/00README create mode 100644 tools/vnet/vnet-module/Makefile create mode 100644 tools/vnet/vnet-module/Makefile-2.4 create mode 100644 tools/vnet/vnet-module/Makefile-2.6 create mode 100644 tools/vnet/vnet-module/Makefile.ver create mode 100644 tools/vnet/vnet-module/Makefile.vnet create mode 100644 tools/vnet/vnet-module/esp.c create mode 100644 tools/vnet/vnet-module/esp.h create mode 100644 tools/vnet/vnet-module/etherip.c create mode 100644 tools/vnet/vnet-module/etherip.h create mode 100644 tools/vnet/vnet-module/if_etherip.h create mode 100644 tools/vnet/vnet-module/if_varp.h create mode 100644 tools/vnet/vnet-module/linux/pfkeyv2.h create mode 100644 tools/vnet/vnet-module/random.c create mode 100644 tools/vnet/vnet-module/random.h create mode 100644 tools/vnet/vnet-module/sa.c create mode 100644 tools/vnet/vnet-module/sa.h create mode 100644 tools/vnet/vnet-module/sa_algorithm.c create mode 100644 tools/vnet/vnet-module/sa_algorithm.h create mode 100644 tools/vnet/vnet-module/skb_context.c create mode 100644 tools/vnet/vnet-module/skb_context.h create mode 100644 tools/vnet/vnet-module/skb_util.c create mode 100644 tools/vnet/vnet-module/skb_util.h create mode 100644 tools/vnet/vnet-module/tunnel.c create mode 100644 tools/vnet/vnet-module/tunnel.h create mode 100644 tools/vnet/vnet-module/varp.c create mode 100644 tools/vnet/vnet-module/varp.h create mode 100644 tools/vnet/vnet-module/varp_socket.c create mode 100644 tools/vnet/vnet-module/vif.c create mode 100644 tools/vnet/vnet-module/vif.h create mode 100644 tools/vnet/vnet-module/vnet.c create mode 100644 tools/vnet/vnet-module/vnet.h create mode 100644 tools/vnet/vnet-module/vnet_dev.c create mode 100644 tools/vnet/vnet-module/vnet_dev.h create mode 100644 tools/vnet/vnet-module/vnet_ioctl.c create mode 100644 tools/vnet/vnet-module/vnet_ioctl.h create mode 100644 tools/vnet/vnetd/Makefile create mode 100644 tools/vnet/vnetd/connection.c create mode 100644 tools/vnet/vnetd/connection.h create mode 100644 tools/vnet/vnetd/marshal.c create mode 100644 tools/vnet/vnetd/marshal.h create mode 100644 tools/vnet/vnetd/select.c create mode 100644 tools/vnet/vnetd/select.h create mode 100644 tools/vnet/vnetd/timer.c create mode 100644 tools/vnet/vnetd/timer.h create mode 100644 tools/vnet/vnetd/vcache.c create mode 100644 tools/vnet/vnetd/vcache.h create mode 100644 tools/vnet/vnetd/vnetd.c create mode 100644 tools/vnet/vnetd/vnetd.h diff --git a/.rootkeys b/.rootkeys index f12d2ae237..4c1e417632 100644 --- a/.rootkeys +++ b/.rootkeys @@ -543,6 +543,64 @@ 40fcefb3yMSrZvApO9ToIi-iQwnchA tools/sv/images/xen.png 41013a83z27rKvWIxAfUBMVZ1eDCDg tools/sv/inc/script.js 40fcefb3zGC9XNBkSwTEobCoq8YClA tools/sv/inc/style.css +41a21888_WlknVWjSxb32Fo13_ujsw tools/vnet/00README +41a21888bOiOJc7blzRbe4MNJoaYTw tools/vnet/Makefile +41a21888mg2k5HeiVjlQYEtJBZT4Qg tools/vnet/doc/vnet-module.txt +41a21888cuxfT8wjCdRR6V1lqf5NtA tools/vnet/doc/vnet-xend.txt +41a21888xEQJAIGktS6XQ4xz2TyA5g tools/vnet/examples/Makefile +41a21888FGQhPR5LJ1GRtOSIIN3QEw tools/vnet/examples/network-vnet +41a21888QPgKrulCfR9SY_pxZKU0KA tools/vnet/examples/vnet97.sxp +41a21888Gm0UBs1i7HqveT7Yz0u8DQ tools/vnet/examples/vnet98.sxp +41a21888r4oGPuGv2Lxl-thgV3H54w tools/vnet/examples/vnet99.sxp +41a21888c9TCRlUwJS9WBjB3e9aWgg tools/vnet/vnet-module/00README +41a21888K2ItolEkksc1MUqyTDI_Kg tools/vnet/vnet-module/Makefile +41a21888mJsFJD7bVMm-nrnWnalGBw tools/vnet/vnet-module/Makefile-2.4 +41a21888Znze3-UCCBZ-Nxpj-bNeHA tools/vnet/vnet-module/Makefile-2.6 +41a21889fwc1judJ7DYvyEviSJ3TPg tools/vnet/vnet-module/Makefile.ver +41a21889m_sYkdODF3j5uhMP-Guy9Q tools/vnet/vnet-module/Makefile.vnet +41a21889bXW2lC28U6KS_s5tOJ_W9Q tools/vnet/vnet-module/esp.c +41a21889L2MfLDsUFQxstt-0frIVmw tools/vnet/vnet-module/esp.h +41a21889V1jOsB2JExI-XQl720WHwg tools/vnet/vnet-module/etherip.c +41a21889IpMYbNufHMDXe2ndNw4JxA tools/vnet/vnet-module/etherip.h +41a21889LT9TNqO2EvTFIUTujrkX9w tools/vnet/vnet-module/if_etherip.h +41a21889PESythGZFG6kmSoOkkN2Nw tools/vnet/vnet-module/if_varp.h +41a21889nCPEomHqOyQ4vnhEm4II4g tools/vnet/vnet-module/linux/pfkeyv2.h +41a21889A_fw4pRmCbBfZdtRunM5Eg tools/vnet/vnet-module/random.c +41a218899Xy2dPKSu3pkuqaqkfKMTA tools/vnet/vnet-module/random.h +41a21889rIH5S1dv8ygdSsTGNlg0JA tools/vnet/vnet-module/sa.c +41a218896Z4vxy6gnV9h0fWRWu0lKQ tools/vnet/vnet-module/sa.h +41a21889qFD8BTbDpB55uVmSVDEsgw tools/vnet/vnet-module/sa_algorithm.c +41a21889r2AwTe-OCSSVMxBzz8uDtw tools/vnet/vnet-module/sa_algorithm.h +41a21889tvjtL7O8tMveVB8MdSKPnQ tools/vnet/vnet-module/skb_context.c +41a21889lD_QOUz2Msd7fB5rJQzfxA tools/vnet/vnet-module/skb_context.h +41a21889F1r1xnJamzdeuClR8MNwQg tools/vnet/vnet-module/skb_util.c +41a21889sS4bjVqEna24sS8NpV7SRA tools/vnet/vnet-module/skb_util.h +41a21889MDawEK3J_f_oAGnZznhG2w tools/vnet/vnet-module/tunnel.c +41a218896TlHXpVVqF50uz_u_WMXRw tools/vnet/vnet-module/tunnel.h +41a21889nQYbJbqrOApg_RbkwPtXGg tools/vnet/vnet-module/varp.c +41a21889Pev5MJlqqass6CxN4mmvPw tools/vnet/vnet-module/varp.h +41a21889GbsHHfkpA-PkOvltfEwpMA tools/vnet/vnet-module/varp_socket.c +41a21889sknn8zd5xCJlpQbs7MvxKg tools/vnet/vnet-module/vif.c +41a21889VsKKWpe6rcXOSLPy2FuNWQ tools/vnet/vnet-module/vif.h +41a21889dgkOyuSTVqy7D8TPIzrUyw tools/vnet/vnet-module/vnet.c +41a21889ocAdwk7V1nNt4iBpmYW-Mw tools/vnet/vnet-module/vnet.h +41a21889YrTiC0ArJSGFtiaHz2j1qQ tools/vnet/vnet-module/vnet_dev.c +41a21889rHT4vrC4VAfk7-xP_K5aBg tools/vnet/vnet-module/vnet_dev.h +41a21889qJj6GjT2f5hMHRvPS1AW4w tools/vnet/vnet-module/vnet_ioctl.c +41a2188a8W4xYB0LYm512agtoEv52g tools/vnet/vnet-module/vnet_ioctl.h +41a2188aFF_1T9OgpqUjjjaCqKB8lw tools/vnet/vnetd/Makefile +41a2188a9j84qS4CxqMLVCvyGpA93w tools/vnet/vnetd/connection.c +41a2188atexNEami9TNVYNkRSb7Bqg tools/vnet/vnetd/connection.h +41a2188abgYpITSrWoMGHHrM56nklw tools/vnet/vnetd/marshal.c +41a2188aUbOi5tAYwOS4aPixo1EGwQ tools/vnet/vnetd/marshal.h +41a2188aDJlSVB1s_st2MSWxW8kMwg tools/vnet/vnetd/select.c +41a2188aE9LUDdSSwNT3BWVWCvGSnQ tools/vnet/vnetd/select.h +41a2188aTbMKv_Eig12dSrBUEBl1Jg tools/vnet/vnetd/timer.c +41a2188aIzBGqQ6DUVzCxfBsN0Q6Ww tools/vnet/vnetd/timer.h +41a2188aIf3Xk6uvk7KzjdpOsflAEw tools/vnet/vnetd/vcache.c +41a2188ar6_vOO3_tEJQjmFVU3409A tools/vnet/vnetd/vcache.h +41a2188aETrGU60X9WtGhYVfU7z0Pw tools/vnet/vnetd/vnetd.c +41a2188ahYjemudGyB7078AWMFR-0w tools/vnet/vnetd/vnetd.h 4194e861IgTabTt8HOuh143QIJFD1Q tools/x2d2/Makefile 4194e861M2gcBz4i94cQYpqzi8n6UA tools/x2d2/cntrl_con.c 4194e8612TrrMvC8ZlA4h2ZYCPWz4g tools/x2d2/minixend.c diff --git a/BitKeeper/etc/ignore b/BitKeeper/etc/ignore index 6d8240a3cf..ec7c4e8ee6 100644 --- a/BitKeeper/etc/ignore +++ b/BitKeeper/etc/ignore @@ -59,6 +59,13 @@ tools/check/.* tools/libxc/xen/* tools/misc/miniterm/miniterm tools/misc/xen_cpuperf +tools/vnet/gc +tools/vnet/gc*/* +tools/vnet/vnet-module/.tmp_versions/* +tools/vnet/vnet-module/.*.cmd +tools/vnet/vnet-module/*.ko +tools/vnet/vnet-module/vnet_module.mod.* +tools/vnetd/vnetd tools/web-shutdown.tap tools/xentrace/xentrace tools/xfrd/xfrd diff --git a/tools/vnet/00README b/tools/vnet/00README new file mode 100644 index 0000000000..a239e26b12 --- /dev/null +++ b/tools/vnet/00README @@ -0,0 +1,10 @@ +This directory contains the implementation of vnets: +virtual private networks for virtual machines. +See doc/ for more information and examples/ for example +configurations. + +The kernel module is in vnet-module/ and the vnet forwarding +daemon is in vnetd/. The vnetd daemon makes vnets work across +subnets when multicast routing is not available. + +Mike Wray \ No newline at end of file diff --git a/tools/vnet/Makefile b/tools/vnet/Makefile new file mode 100644 index 0000000000..346adb99d3 --- /dev/null +++ b/tools/vnet/Makefile @@ -0,0 +1,42 @@ + +export LINUX_RELEASE ?=2.6 + +all: compile + +compile: vnetd vnet-module + +gc.tar.gz: + wget http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/$@ + +gc: gc.tar.gz + tar xfz gc.tar.gz + ln -sf gc?.? gc + +gc-install: gc + (cd gc && ./configure --prefix=`pwd`/install && make && make install) + +gc-clean: + -$(MAKE) -C gc clean + +gc-pristine: + -rm -rf gc?.? gc + +.PHONY: vnetd vnet-module install dist clean + +vnetd: gc-install + $(MAKE) -C vnetd + +vnet-module: + $(MAKE) -C vnet-module + +install: compile + $(MAKE) -C vnetd install + $(MAKE) -C vnet-module install + $(MAKE) -C examples install + +dist: $(TARGET) + $(MAKE) prefix=`pwd`/../../install dist=yes install + +clean: + -$(MAKE) -C vnetd clean + -$(MAKE) -C vnet-module clean diff --git a/tools/vnet/doc/vnet-module.txt b/tools/vnet/doc/vnet-module.txt new file mode 100644 index 0000000000..b9c8ac57c3 --- /dev/null +++ b/tools/vnet/doc/vnet-module.txt @@ -0,0 +1,50 @@ +Vnet Module Command Interface +Mike Wray +2004/09/17 + +When insmod the vnet-module creates /proc/vnet/policy which +can be used to control the module by writing commands into it. +The return code from the command should be returned by close. + +The commands are: + +(vnet.add (id ) [(security { none | auth | conf } )] ) + +Create the vnet with id and the given security level (default none). +Security levels: +- none: no security +- auth: message authentication (IPSEC hmac) +- conf: message confidentiality (IPSEC hmac and encryption) + +(vnet.del (id )) + +Delete the vnet with id . + +(vif.add (vnet ) (vmac )) + +Add the vif with MAC address to the vnet with id . +This makes the vnet module respond to VARP requests for +on vnet . + +(vif.del (vnet ) (vmac )) + +Remove the vif with MAC address from the vnet with id . +The vnet module will stop responding to VARP for the vif. + +Examples: + +To create vnet 10 with no security: + +echo '(vnet.add (id 10))' > /proc/vnet/policy + +To create vnet 11 with message authentication: + +echo '(vnet.add (id 11) (security auth))' > /proc/vnet/policy + +To add the vif with vmac "aa:00:00:bc:34:ae" to vnet 10: + +echo '(vif.add (vnet 10) (vmac aa:00:00:bc:34:ae))' > /proc/vnet/policy + +To remove the vif from the vnet: + +echo '(vif.del (vnet 10) (vmac aa:00:00:bc:34:ae))' > /proc/vnet/policy diff --git a/tools/vnet/doc/vnet-xend.txt b/tools/vnet/doc/vnet-xend.txt new file mode 100644 index 0000000000..9ad1c523d4 --- /dev/null +++ b/tools/vnet/doc/vnet-xend.txt @@ -0,0 +1,140 @@ + +Vnets: Virtual Networks for Virtual Machines + +Mike Wray + +0) Introduction +--------------- + +Vnets provide virtual private LANs for virtual machines. +This is done using bridging and tunneling. A virtual interface +on a vnet can only see other interfaces on the same vnet - it cannot +see the real network, and the real network cannot see it either. + +Virtual interfaces on the same vnet can be on the same machine +or on different machines, they can still talk. The hosting machines +can even be on different subnets if you run vnetd to forward, +or have multicast routing enabled. + + +1) Installing vnet support +-------------------------- + +Assuming the code has been installed (make install in the parent directory), +configure xend to use 'network-vnet' instead of the default 'network' to +start up networking. This just loads the vnet module when networking starts. + +In /etc/xend/xend-config.sxp: + +Configure the network script: + +(network-script network-vnet) + +Restart xend. + +2) Creating vnets +----------------- + +Xend already implements commands to add/remove vnets and +bridge to them. To add a vnet use + +xm call vnet_add + +For example, if vnet97.sxp contains: + +(vnet (id 97) (bridge vnet97) (vnetif vnetif97) (security none)) + +do + +xm call vnet_add vnet97.sxp + +This will define a vnet with id 97 and no security. The bridge for the +vnet is called vnet97 and the virtual interface for it is vnetif97. +To add an interface on a vm to this vnet simply set its bridge to vnet97 +in its configuration. + +In Python: + +vif="bridge=vnet97" + +In sxp: + +(dev (vif (mac aa:00:00:01:02:03) (bridge vnet97))) + +Once configured, vnets are persistent in the xend database. +To remove a vnet use + +xm call vnet_delete + +To list vnets use + +xm call vnets + +To get information on a vnet id use + +xm call vnet + +3) Troubleshooting +------------------ + +The vnet module should appear in 'lsmod'. +If a vnet has been configured it should appear in the output of 'xm call vnets'. +Its bridge and interface should appear in 'ifconfig'. +It should also show in 'brctl show', with its attached interfaces. + +You can 'see into' a vnet from dom0 if you put an IP address on the bridge. +For example, if you have vnet97 with a vm with ip addr 10.0.0.12 on it, +then + +ifconfig vnet97 10.0.0.20 up + +should let you ping 10.0.0.12 via the vnet97 bridge. + +4) Examples +----------- + +Here's the full config for a vm on vnet 97, using ip addr 10.0.0.12: + +(vm + (name dom12) + (memory '64') + (cpu '1') + (console '8502') + (image + (linux + (kernel /boot/vmlinuz-2.6.9-xenU) + (ip 10.0.0.12:1.2.3.4::::eth0:off) + (root /dev/hda1) + (args 'rw fastboot 4') + ) + ) + (device (vbd (uname phy:hda2) (dev hda1) (mode w))) + (device (vif (mac aa:00:00:11:00:12) (bridge vnet97))) +) + +If you run another vm on the same vnet: + +(vm + (name dom11) + (memory '64') + (cpu '1') + (console '8501') + (image + (linux + (kernel /boot/vmlinuz-2.6.9-xenU) + (ip 10.0.0.11:1.2.3.4::::eth0:off) + (root /dev/hda1) + (args 'rw fastboot 4') + ) + ) + (device (vbd (uname phy:hda3) (dev hda1) (mode w))) + (device (vif (mac aa:00:00:11:00:11) (bridge vnet97))) +) + +the vms should be able to talk over the vnet. Check with ping. +If they are both on the same machine the connection will simply +be the vnet97 bridge, if they are on separate machines their +packets will be tunneled in etherip. They should be able to +see each other, but not the real network. + + diff --git a/tools/vnet/examples/Makefile b/tools/vnet/examples/Makefile new file mode 100644 index 0000000000..fe9d9f56ad --- /dev/null +++ b/tools/vnet/examples/Makefile @@ -0,0 +1,12 @@ +# -*- mode: Makefile; -*- +#============================================================================ + +XEN_SCRIPT_DIR:=/etc/xen/scripts + +all: + +install: + install -m 0755 -d $(prefix)$(XEN_SCRIPT_DIR) + install -m 0554 network-vnet $(prefix)$(XEN_SCRIPT_DIR) + +clean: \ No newline at end of file diff --git a/tools/vnet/examples/network-vnet b/tools/vnet/examples/network-vnet new file mode 100755 index 0000000000..4b388bd3f9 --- /dev/null +++ b/tools/vnet/examples/network-vnet @@ -0,0 +1,218 @@ +#!/bin/sh +#============================================================================ +# Default Xen network start/stop script. +# Xend calls a network script when it starts. +# The script name to use is defined in /etc/xen/xend-config.sxp +# in the network-script field. +# +# This script creates a bridge (default xen-br0), adds a device +# (default eth0) to it, copies the IP addresses from the device +# to the bridge and adjusts the routes accordingly. +# +# If all goes well, this should ensure that networking stays up. +# However, some configurations are upset by this, especially +# NFS roots. If the bridged setup does not meet your needs, +# configure a different script, for example using routing instead. +# +# Usage: +# +# network (start|stop|status) {VAR=VAL}* +# +# Vars: +# +# bridge The bridge to use (default xen-br0). +# netdev The interface to add to the bridge (default eth0). +# antispoof Whether to use iptables to prevent spoofing (default yes). +# +# start: +# Creates the bridge and enslaves netdev to it. +# Copies the IP addresses from netdev to the bridge. +# Deletes the routes to netdev and adds them on bridge. +# +# stop: +# Removes netdev from the bridge. +# Deletes the routes to bridge and adds them to netdev. +# +# status: +# Print ifconfig for netdev and bridge. +# Print routes. +# +#============================================================================ + +# Exit if anything goes wrong. +set -e + +# First arg is the operation. +OP=$1 +shift + +# Pull variables in args in to environment. +for arg ; do export "${arg}" ; done + +bridge=${bridge:-xen-br0} +netdev=${netdev:-eth0} +antispoof=${antispoof:-yes} + +echo "network $OP bridge=$bridge netdev=$netdev antispoof=$antispoof" + +# Usage: transfer_addrs src dst +# Copy all IP addresses (including aliases) from device $src to device $dst. +transfer_addrs () { + local src=$1 + local dst=$2 + # Don't bother if $dst already has IP addresses. + if ip addr show dev ${dst} | egrep -q '^ *inet' ; then + return + fi + # Address lines start with 'inet' and have the device in them. + # Replace 'inet' with 'ip addr add' and change the device name $src + # to 'dev $src'. Remove netmask as we'll add routes later. + ip addr show dev ${src} | egrep '^ *inet' | sed -e " +s/inet/ip addr add/ +s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+\)/[0-9]\+@\1@ +s/${src}/dev ${dst}/ +" | sh -e +} + +# Usage: transfer_routes src dst +# Get all IP routes to device $src, delete them, and +# add the same routes to device $dst. +# The original routes have to be deleted, otherwise adding them +# for $dst fails (duplicate routes). +transfer_routes () { + local src=$1 + local dst=$2 + # List all routes and grep the ones with $src in. + # Stick 'ip route del' on the front to delete. + # Change $src to $dst and use 'ip route add' to add. + ip route list | grep ${src} | sed -e " +h +s/^/ip route del / +P +g +s/${src}/${dst}/ +s/^/ip route add / +P +d +" | sh -e +} + +# Usage: create_bridge dev bridge +# Create bridge $bridge and add device $dev to it. +create_bridge () { + local dev=$1 + local bridge=$2 + + # Don't create the bridge if it already exists. + if ! brctl show | grep -q ${bridge} ; then + brctl addbr ${bridge} + brctl stp ${bridge} off + brctl setfd ${bridge} 0 + fi + ifconfig ${bridge} up +} + +# Usage: antispoofing dev bridge +# Set the default forwarding policy for $dev to drop. +# Allow forwarding to the bridge. +antispoofing () { + local dev=$1 + local bridge=$2 + + iptables -P FORWARD DROP + iptables -A FORWARD -m physdev --physdev-in ${dev} -j ACCEPT +} + +# Usage: show_status dev bridge +# Print ifconfig and routes. +show_status () { + local dev=$1 + local bridge=$2 + + echo '============================================================' + ifconfig ${dev} + ifconfig ${bridge} + echo ' ' + ip route list + echo ' ' + route -n + echo '============================================================' +} + +# Insert the vnet module if it can be found and +# it's not already there. +vnet_insert () { + local module="vnet_module" + local mod_dir=/lib/modules/$(uname -r)/kernel + local mod_path="${mod_dir}/${module}" + local mod_obj="" + + for ext in ".o" ".ko" ; do + f=${mod_path}${ext} + if [ -f ${f} ] ; then + mod_obj=$f + break + fi + done + if [ "${mod_obj}" == "" ] ; then + return + fi + if lsmod | grep -q ${module} ; then + echo "VNET: ${module} loaded" + else + echo "VNET: Loading ${module}..." + insmod ${mod_obj} + fi +} + +op_start () { + if [ "${bridge}" == "null" ] ; then + return + fi + # Create the bridge and give it the interface IP addresses. + # Move the interface routes onto the bridge. + create_bridge ${netdev} ${bridge} + transfer_addrs ${netdev} ${bridge} + transfer_routes ${netdev} ${bridge} + # Don't add $dev to $bridge if it's already on a bridge. + if ! brctl show | grep -q ${netdev} ; then + brctl addif ${bridge} ${netdev} + fi + + if [ ${antispoof} == 'yes' ] ; then + antispoofing ${netdev} ${bridge} + fi + + vnet_insert +} + +op_stop () { + if [ "${bridge}" == "null" ] ; then + return + fi + # Remove the interface from the bridge. + # Move the routes back to the interface. + brctl delif ${bridge} ${netdev} + transfer_routes ${bridge} ${netdev} + + # It's not our place to be enabling forwarding... +} + +case ${OP} in + start) + op_start + ;; + + stop) + op_stop + ;; + + status) + show_status ${netdev} ${bridge} + ;; + + *) + echo 'Unknown command: ' ${OP} + echo 'Valid commands are: start, stop, status' + exit 1 +esac diff --git a/tools/vnet/examples/vnet97.sxp b/tools/vnet/examples/vnet97.sxp new file mode 100644 index 0000000000..ef0784369b --- /dev/null +++ b/tools/vnet/examples/vnet97.sxp @@ -0,0 +1,3 @@ +# Vnet configuration for a vnet with id 97 and no security. +# Configure using 'xm call vnet_add vnet97.sxp'. +(vnet (id 97) (bridge vnet97) (vnetif vnetif97) (security none)) diff --git a/tools/vnet/examples/vnet98.sxp b/tools/vnet/examples/vnet98.sxp new file mode 100644 index 0000000000..807d56daaf --- /dev/null +++ b/tools/vnet/examples/vnet98.sxp @@ -0,0 +1,3 @@ +# Vnet configuration for a vnet with id 98 and message authentication. +# Configure using 'xm call vnet_add vnet98.sxp'. +(vnet (id 98) (bridge vnet98) (vnetif vnetif98) (security auth)) diff --git a/tools/vnet/examples/vnet99.sxp b/tools/vnet/examples/vnet99.sxp new file mode 100644 index 0000000000..ffce1d7fbf --- /dev/null +++ b/tools/vnet/examples/vnet99.sxp @@ -0,0 +1,3 @@ +# Vnet configuration for a vnet with id 99 and message confidentiality. +# Configure using 'xm call vnet_add vnet99.sxp'. +(vnet (id 99) (bridge vnet99) (vnetif vnetif99) (security conf)) diff --git a/tools/vnet/vnet-module/00README b/tools/vnet/vnet-module/00README new file mode 100644 index 0000000000..8dfaf00bbd --- /dev/null +++ b/tools/vnet/vnet-module/00README @@ -0,0 +1,41 @@ +Vnet module for network virtualization. +Mike Wray + +*) Compiling +The vnet module can be compiled for 2.4 or 2.6 series kernels. +The makefiles use the following variables, which +can be set in your env or on the make command line: + +LINUX_RELEASE: linux release to compile for, 2.4 (default), or 2.6. +XENO_ROOT: root of the xen tree containing kernel source. Default '..'. +ROOT: root path to install in, default is XENO_ROOT/install. + Set to '/' to install relative to filesystem root. +KERNEL_VERSION: kernel version, default got from XENO_ROOT. +KERNEL_MINOR: kernel minor version, default -xen0. +KERNEL_SRC: path to kernel source, default linux- under XENO_ROOT. + +*) For 2.4 kernel + +To compile from scratch: + +make clean +make + +This will build vnet_module.o in the current directory. +To install the module use + +make install + +*) For 2.6 kernel + +To compile from scratch: + +make clean +make LINUX_RELEASE=2.6 + +This will build vnet_module.ko in the current directory. +To install the module use + +make LINUX_RELEASE=2.6 install + + diff --git a/tools/vnet/vnet-module/Makefile b/tools/vnet/vnet-module/Makefile new file mode 100644 index 0000000000..a9bf9afea9 --- /dev/null +++ b/tools/vnet/vnet-module/Makefile @@ -0,0 +1,67 @@ +# -*- mode: Makefile; -*- +#============================================================================ +# +# Copyright (C) 2004 Mike Wray +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free software Foundation, Inc., +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA +#============================================================================ + +#============================================================================ +ifeq ($(src),) +LINUX_RELEASE ?=2.6 + +include Makefile-$(LINUX_RELEASE) + +#============================================================================ +else +#============================================================================ +# This section is for the 2.6 kbuild. + +#$(warning KBUILD_EXTMOD $(KBUILD_EXTMOD)) +#$(warning src $(src)) +#$(warning obj $(obj)) + +include $(src)/Makefile.vnet + +obj-m = vnet_module.o +vnet_module-objs = $(VNET_OBJ) +vnet_module-objs += $(VNET_LIB_OBJ) + +#---------------------------------------------------------------------------- +# The fancy stuff in the kernel build defeats 'vpath %.c' so we can't +# use that to get the lib files compiled. +# Setup explicit rules for them using the kbuild C compile rule. + +# File names in the lib dir. +remote_srcs = $(foreach file,$(VNET_LIB_SRC),$(LIB_DIR)/$(file)) + +# Equivalent file names here. +local_srcs = $(foreach file,$(VNET_LIB_SRC),$(src)/$(file)) + +# Objects for the local names. +local_objs = $(local_srcs:.c=.o) + +# Make the local objects depend on compiling the remote sources. +$(local_objs): $(src)/%.o: $(LIB_DIR)/%.c + $(call if_changed_rule,cc_o_c) +#---------------------------------------------------------------------------- + +vpath %.h $(LIB_DIR) +EXTRA_CFLAGS += -I $(LIB_DIR) +EXTRA_CFLAGS += -I $(src) + +endif +#============================================================================ + diff --git a/tools/vnet/vnet-module/Makefile-2.4 b/tools/vnet/vnet-module/Makefile-2.4 new file mode 100644 index 0000000000..e0dad1a8c8 --- /dev/null +++ b/tools/vnet/vnet-module/Makefile-2.4 @@ -0,0 +1,97 @@ +# -*- mode: Makefile; -*- +#============================================================================ +# +# Copyright (C) 2004 Mike Wray +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free software Foundation, Inc., +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA +#============================================================================ + +#============================================================================ +# Vnet module makefile for 2.4 series kernels. + +include Makefile.ver + +KERNEL_MODULE := vnet_module.o + +CONFIG_MODVERSIONS := $(shell grep 'CONFIG_MODVERSIONS=y' $(KERNEL_SRC)/.config && echo 1 || echo 0) + +include Makefile.vnet + +VNET_OBJ += $(VNET_LIB_OBJ) + +#---------------------------------------------------------------------------- + +vpath %.h $(KERNEL_SRC)/include +INCLUDES+= -I $(KERNEL_SRC)/include + +vpath %.h $(LIB_DIR) +vpath %.c $(LIB_DIR) +INCLUDES += -I $(LIB_DIR) + +INCLUDES+= -I . + +#---------------------------------------------------------------------------- + +CPPFLAGS += -D__KERNEL__ +CPPFLAGS += -DMODULE + +ifeq ($(CONFIG_MODVERSIONS), 1) +CPPFLAGS += -DMODVERSIONS +CPPFLAGS += -include $(KERNEL_SRC)/include/linux/modversions.h +endif + +CPPFLAGS += $(INCLUDES) + +CFLAGS += -Wall +CFLAGS += -Wstrict-prototypes +CFLAGS += -Wno-trigraphs +CFLAGS += -Wno-unused-function +CFLAGS += -Wno-unused-parameter + +CFLAGS += -g +CFLAGS += -O2 +CFLAGS += -fno-strict-aliasing +CFLAGS += -fno-common +#CFLAGS += -fomit-frame-pointer + +# Dependencies. Gcc generates them for us. +CFLAGS += -Wp,-MD,.$(@F).d +VNET_DEP = .*.d +#---------------------------------------------------------------------------- + +.PHONY: all +all: module + +.PHONY: module modules +module modules: $(KERNEL_MODULE) + +$(KERNEL_MODULE): $(VNET_OBJ) + $(LD) -r -o $@ $^ + +.PHONY: install install-module modules_install +install install-module modules_install: module + install -m 0755 -d $(prefix)$(KERNEL_MODULE_DIR) + install -m 0554 $(KERNEL_MODULE) $(prefix)$(KERNEL_MODULE_DIR) + +TAGS: + etags *.c *.h + +.PHONY: clean +clean: + @rm -f *.a *.o *.ko *~ + @rm -f $(VNET_DEP) .*.cmd *.mod.? + @rm -rf .tmp_versions + +-include $(VNET_DEP) diff --git a/tools/vnet/vnet-module/Makefile-2.6 b/tools/vnet/vnet-module/Makefile-2.6 new file mode 100644 index 0000000000..a46db54da4 --- /dev/null +++ b/tools/vnet/vnet-module/Makefile-2.6 @@ -0,0 +1,51 @@ +# -*- mode: Makefile; -*- +#============================================================================ +# +# Copyright (C) 2004 Mike Wray +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free software Foundation, Inc., +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA +#============================================================================ + +#============================================================================ +# Vnet module makefile for 2.6 series kernels. + +LINUX_RELEASE ?= 2.6 +include Makefile.ver + +KERNEL_MODULE = vnet_module.ko + +#---------------------------------------------------------------------------- +#export KBUILD_VERBOSE=1 + +.PHONY: all +all: module + +.PHONY: module +module modules: + $(MAKE) -C $(KERNEL_SRC) M=`pwd` modules + +.PHONY: install install-module modules_install +install install-module modules_install: module + install -m 0755 -d $(prefix)$(KERNEL_MODULE_DIR) + install -m 0554 $(KERNEL_MODULE) $(prefix)$(KERNEL_MODULE_DIR) + +.PHONY: clean +clean: + @$(MAKE) -C $(KERNEL_SRC) M=$(PWD) clean + @rm -f *.a *.o *.ko *~ .*.d .*.cmd *.mod.? + +TAGS: + etags *.c *.h + diff --git a/tools/vnet/vnet-module/Makefile.ver b/tools/vnet/vnet-module/Makefile.ver new file mode 100644 index 0000000000..ddd3541ee3 --- /dev/null +++ b/tools/vnet/vnet-module/Makefile.ver @@ -0,0 +1,49 @@ +# -*- mode: Makefile; -*- +#============================================================================ +# +# Copyright (C) 2004 Mike Wray +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free software Foundation, Inc., +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA +#============================================================================ + +#---------------------------------------------------------------------------- +# Xeno/xen. + +# Root of xen tree. +XEN_ROOT ?=../../.. + +# Path to relativize the install. Set to / +# to install relative to filesystem root. +prefix ?=$(XEN_ROOT)/install/ +#---------------------------------------------------------------------------- + +LINUX_RELEASE ?=2.6 +KERNEL_MINOR ?=-xen0 + +LINUX_VERSION ?= $(shell ( /bin/ls -ld $(XEN_ROOT)/linux-$(LINUX_RELEASE).*-xen-sparse ) 2>/dev/null | \ + sed -e 's!^.*linux-\(.\+\)-xen-sparse!\1!' ) + +ifeq ($(LINUX_VERSION),) +$(error Kernel source for linux $(LINUX_RELEASE) not found) +endif + +KERNEL_VERSION =$(LINUX_VERSION)$(KERNEL_MINOR) + +KERNEL_SRC ?= $(XEN_ROOT)/linux-$(KERNEL_VERSION) + +KERNEL_MODULE_DIR = /lib/modules/$(KERNEL_VERSION)/kernel + +#$(warning KERNEL_VERSION $(KERNEL_VERSION)) +#$(warning KERNEL_SRC $(KERNEL_SRC)) diff --git a/tools/vnet/vnet-module/Makefile.vnet b/tools/vnet/vnet-module/Makefile.vnet new file mode 100644 index 0000000000..366c2fc9b9 --- /dev/null +++ b/tools/vnet/vnet-module/Makefile.vnet @@ -0,0 +1,57 @@ +# -*- mode: Makefile; -*- +#============================================================================ +# +# Copyright (C) 2004 Mike Wray +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 2 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free software Foundation, Inc., +# 59 Temple Place, suite 330, Boston, MA 02111-1307 USA +#============================================================================ + +ifeq ($(src),) +SRC_DIR= +else +SRC_DIR=$(src)/ +endif + +LIB_DIR := $(SRC_DIR)../../libxutil + +VNET_SRC := +VNET_SRC += esp.c +VNET_SRC += etherip.c +VNET_SRC += random.c +VNET_SRC += sa_algorithm.c +VNET_SRC += sa.c +VNET_SRC += skb_context.c +VNET_SRC += skb_util.c +VNET_SRC += tunnel.c +VNET_SRC += varp.c +VNET_SRC += varp_socket.c +VNET_SRC += vif.c +VNET_SRC += vnet.c +VNET_SRC += vnet_dev.c +VNET_SRC += vnet_ioctl.c + +VNET_LIB_SRC += allocate.c +VNET_LIB_SRC += enum.c +VNET_LIB_SRC += hash_table.c +VNET_LIB_SRC += iostream.c +VNET_LIB_SRC += kernel_stream.c +VNET_LIB_SRC += sxpr.c +VNET_LIB_SRC += sxpr_parser.c +VNET_LIB_SRC += sys_net.c +VNET_LIB_SRC += sys_string.c + +VNET_OBJ := $(VNET_SRC:.c=.o) +VNET_LIB_OBJ := $(VNET_LIB_SRC:.c=.o) + diff --git a/tools/vnet/vnet-module/esp.c b/tools/vnet/vnet-module/esp.c new file mode 100644 index 0000000000..7e27006835 --- /dev/null +++ b/tools/vnet/vnet-module/esp.c @@ -0,0 +1,863 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static const int DEBUG_ICV = 0; + +#define MODULE_NAME "IPSEC" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +/* Outgoing packet: [ eth | ip | data ] + * After etherip: [ eth2 | ip2 | ethip | eth | ip | data ] + * After esp : [ eth2 | ip2 | esp | {ethip | eth | ip | data} | pad | icv ] + * ^ + + * The curly braces { ... } denote encryption. + * The esp header includes the fixed esp headers and the iv (variable size). + * The point marked ^ does not move. To the left is in the header, to the right + * is in the frag. Remember that all outgoing skbs (from domains) have 1 frag. + * Data after + is added by esp, using an extra frag. + * + * Incoming as above. + * After decrypt: [ eth2 | ip2 | esp | ethip | eth | ip | data | pad | icv ] + * Trim tail: [ eth2 | ip2 | esp | ethip | eth | ip | data ] + * Drop hdr: [ eth2 | ip2 | ethip | eth | ip | data ] + * ^ + * The point marked ^ does not move. Incoming skbs are linear (no frags). + * The tail is trimmed by adjusting skb->tail and len. + * The esp hdr is dropped by using memmove to move the headers and + * adjusting the skb pointers. + * + * todo: Now this code is in linux we can't assume 1 frag for outbound skbs, + * or (maybe) that memmove is safe on inbound. + */ + +/** Round n up to a multiple of block. + * If block is less than 2 does nothing. + * Otherwise assume block is a power of 2. + * + * @param n to round up + * @param block size to round to a multiple of + * @return rounded value + */ +static inline int roundup(int n, int block){ + if(block <= 1) return n; + block--; + return (n + block) & ~block; +} + +/** Check if n is a multiple of block. + * If block is less than 2 returns 1. + * Otherwise assumes block is a power of 2. + * + * @param n to check + * @param block block size + * @return 1 if a multiple, 0 otherwise + */ +static inline int multipleof(int n, int block){ + if(block <= 1) return 1; + block--; + return !(n & block); +} + +/** Convert from bits to bytes. + * + * @param n number of bits + * @return number of bytes + */ +static inline int bits_to_bytes(int n){ + return n / 8; +} + + +/** Insert esp padding at the end of an skb. + * Inserts padding bytes, number of padding bytes, protocol number. + * + * @param skb skb + * @param offset offset from skb end to where padding should end + * @param extra_n total amount of padding + * @param protocol protocol number (from original ip hdr) + * @return 0 on success, error code otherwise + */ +static int esp_sa_pad(struct sk_buff *skb, int offset, int extra_n, + unsigned char protocol){ + int err; + char *data; + int pad_n = extra_n - ESP_PAD_N; + int i; + char buf[extra_n]; + + data = buf; + for(i = 1; i <= pad_n; i++){ + *data++ = i; + } + *data++ = pad_n; + *data++ = protocol; + err = skb_put_bits(skb, skb->len - offset - extra_n, buf, extra_n); + return err; +} + +/** Encrypt skb. Skips esp header and iv. + * Assumes skb->data points at esp header. + * + * @param esp esp state + * @parm esph esp header + * @param skb packet + * @param head_n size of esp header and iv + * @param iv_n size of iv + * @param text_n size of ciphertext + * @return 0 on success, error code otherwise + */ +static int esp_sa_encrypt(ESPState *esp, ESPHdr *esph, struct sk_buff *skb, + int head_n, int iv_n, int text_n){ + int err = 0; + int sg_n = skb_shinfo(skb)->nr_frags + 1; + struct scatterlist sg[sg_n]; + + err = skb_scatterlist(skb, sg, &sg_n, head_n, text_n); + if(err) goto exit; + if(iv_n){ + crypto_cipher_set_iv(esp->cipher.tfm, esp->cipher.iv, iv_n); + } + crypto_cipher_encrypt(esp->cipher.tfm, sg, sg, text_n); + if(iv_n){ + memcpy(esph->data, esp->cipher.iv, iv_n); + crypto_cipher_get_iv(esp->cipher.tfm, esp->cipher.iv, iv_n); + } + exit: + return err; +} + +/** Decrypt skb. Skips esp header and iv. + * Assumes skb->data points at esp header. + * + * @param esp esp state + * @parm esph esp header + * @param skb packet + * @param head_n size of esp header and iv + * @param iv_n size of iv + * @param text_n size of ciphertext + * @return 0 on success, error code otherwise + */ +static int esp_sa_decrypt(ESPState *esp, ESPHdr *esph, struct sk_buff *skb, + int head_n, int iv_n, int text_n){ + int err = 0; + int sg_n = skb_shinfo(skb)->nr_frags + 1; + struct scatterlist sg[sg_n]; + + err = skb_scatterlist(skb, sg, &sg_n, head_n, text_n); + if(err) goto exit; + if(iv_n){ + crypto_cipher_set_iv(esp->cipher.tfm, esph->data, iv_n); + } + crypto_cipher_decrypt(esp->cipher.tfm, sg, sg, text_n); + exit: + return err; +} + +/** Compute icv. Includes esp header, iv and ciphertext. + * Assumes skb->data points at esp header. + * + * @param esp esp state + * @param skb packet + * @param digest_n number of bytes to digest + * @param icv_n size of icv + * @return 0 on success, error code otherwise + */ +static int esp_sa_digest(ESPState *esp, struct sk_buff *skb, int digest_n, int icv_n){ + int err = 0; + u8 icv[icv_n]; + + if(DEBUG_ICV){ + dprintf("> skb digest_n=%d icv_n=%d\n", digest_n, icv_n); + skb_print_bits(skb, 0, digest_n); + } + memset(icv, 0, icv_n); + esp->digest.icv(esp, skb, 0, digest_n, icv); + skb_put_bits(skb, digest_n, icv, icv_n); + return err; +} + +/** Check the icv and trim it from the skb tail. + * + * @param sa sa state + * @param esp esp state + * @param esph esp header + * @param skb packet + * @return 0 on success, error code otherwise + */ +static int esp_check_icv(SAState *sa, ESPState *esp, ESPHdr *esph, struct sk_buff *skb){ + int err = 0; + int icv_n = esp->digest.icv_n; + int digest_n = skb->len - icv_n; + u8 icv_skb[icv_n]; + u8 icv_new[icv_n]; + + dprintf(">\n"); + if(DEBUG_ICV){ + dprintf("> skb len=%d digest_n=%d icv_n=%d\n", + skb->len, digest_n, icv_n); + skb_print_bits(skb, 0, skb->len); + } + if(skb_copy_bits(skb, digest_n, icv_skb, icv_n)){ + wprintf("> Error getting icv from skb\n"); + goto exit; + } + esp->digest.icv(esp, skb, 0, digest_n, icv_new); + if(DEBUG_ICV){ + dprintf("> len=%d icv_n=%d", digest_n, icv_n); + printk("\nskb="); buf_print(icv_skb, icv_n); + printk("new="); buf_print(icv_new, icv_n); + } + if(unlikely(memcmp(icv_new, icv_skb, icv_n))){ + wprintf("> ICV check failed!\n"); + err = -EINVAL; + sa->counts.integrity_failures++; + goto exit; + } + skb_trim_tail(skb, icv_n); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Send a packet via an ESP SA. + * + * @param sa SA state + * @param skb packet to send + * @param tunnel underlying tunnel + * @return 0 on success, negative error code otherwise + */ +static int esp_sa_send(SAState *sa, struct sk_buff *skb, Tunnel *tunnel){ + int err = 0; + int ip_n; // Size of ip header. + int plaintext_n; // Size of plaintext. + int ciphertext_n; // Size of ciphertext (including padding). + int extra_n; // Extra bytes needed for ciphertext. + int icv_n = 0; // Size of integrity check value (icv). + int iv_n = 0; // Size of initialization vector (iv). + int head_n; // Size of esp header and iv. + int tail_n; // Size of esp trailer: padding and icv. + ESPState *esp; + ESPHdr *esph; + + dprintf(">\n"); + esp = sa->data; + ip_n = (skb->nh.iph->ihl << 2); + // Assuming skb->data points at ethernet header, exclude ethernet + // header and IP header. + plaintext_n = skb->len - ETH_HLEN - ip_n; + // Add size of padding fields. + ciphertext_n = roundup(plaintext_n + ESP_PAD_N, esp->cipher.block_n); + if(esp->cipher.pad_n > 0){ + ciphertext_n = roundup(ciphertext_n, esp->cipher.pad_n); + } + extra_n = ciphertext_n - plaintext_n; + iv_n = esp->cipher.iv_n; + icv_n = esp->digest.icv_n; + dprintf("> len=%d plaintext=%d ciphertext=%d extra=%d\n", + skb->len, plaintext_n, ciphertext_n, extra_n); + dprintf("> iv=%d icv=%d\n", iv_n, icv_n); + skb_print_bits(skb, 0, skb->len); + + // Add headroom for esp header and iv, tailroom for the ciphertext + // and icv. + head_n = ESP_HDR_N + iv_n; + tail_n = extra_n + icv_n; + err = skb_make_room(&skb, skb, head_n, tail_n); + if(err) goto exit; + dprintf("> skb=%p\n", skb); + // Move the headers up to make space for the esp header. We can + // use memmove() since all this data fits in the skb head. + // todo: Can't assume this anymore? + dprintf("> header push...\n"); + __skb_push(skb, head_n); + if(0 && skb->mac.raw){ + dprintf("> skb->mac=%p\n", skb->mac.raw); + dprintf("> ETH header pull...\n"); + memmove(skb->data, skb->mac.raw, ETH_HLEN); + skb->mac.raw = skb->data; + __skb_pull(skb, ETH_HLEN); + } + dprintf("> IP header pull...\n"); + memmove(skb->data, skb->nh.raw, ip_n); + skb->nh.raw = skb->data; + __skb_pull(skb, ip_n); + esph = (void*)skb->data; + // Add spi and sequence number. + esph->spi = sa->ident.spi; + esph->seq = htonl(++sa->replay.send_seq); + // Insert the padding bytes: extra bytes less the pad fields + // themselves. + dprintf("> esp_sa_pad ...\n"); + esp_sa_pad(skb, icv_n, extra_n, skb->nh.iph->protocol); + if(sa->security & SA_CONF){ + dprintf("> esp_sa_encrypt...\n"); + err = esp_sa_encrypt(esp, esph, skb, head_n, iv_n, ciphertext_n); + if(err) goto exit; + } + if(icv_n){ + dprintf("> esp_sa_digest...\n"); + err = esp_sa_digest(esp, skb, head_n + ciphertext_n, icv_n); + if(err) goto exit; + } + dprintf("> IP header push...\n"); + __skb_push(skb, ip_n); + if(0 && skb->mac.raw){ + dprintf("> ETH header push...\n"); + __skb_push(skb, ETH_HLEN); + } + // Fix ip header. Adjust length field, set protocol, zero + // checksum. + { + // Total packet length (bytes). + int tot_len = ntohs(skb->nh.iph->tot_len); + tot_len += head_n; + tot_len += tail_n; + skb->nh.iph->protocol = IPPROTO_ESP; + skb->nh.iph->tot_len = htons(tot_len); + skb->nh.iph->check = 0; + } + err = Tunnel_send(tunnel, skb); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Release an skb context. + * Drops the refcount on the SA. + * + * @param context to free + */ +static void esp_context_free_fn(SkbContext *context){ + SAState *sa; + if(!context) return; + sa = context->data; + if(!sa) return; + context->data = NULL; + SAState_decref(sa); +} + +/** Receive a packet via an ESP SA. + * Does ESP receive processing (check icv, decrypt), strips + * ESP header and re-receives. + * + * @param sa SA + * @param skb packet + * @return 0 on success, negative error code otherwise + */ +static int esp_sa_recv(SAState *sa, struct sk_buff *skb){ + int err = -EINVAL; + int mine = 0; + int vnet = 0; //todo: fixme - need to record skb vnet somewhere + ESPState *esp; + ESPHdr *esph; + ESPPadding *pad; + int block_n; // Cipher blocksize. + int icv_n; // Size of integrity check value (icv). + int iv_n; // Size of initialization vector (iv). + int text_n; // Size of text (ciphertext or plaintext). + int head_n; // Size of esp header and iv. + + dprintf("> skb=%p\n", skb); + // Assumes skb->data points at esp hdr. + esph = (void*)skb->data; + esp = sa->data; + block_n = crypto_tfm_alg_blocksize(esp->cipher.tfm); + icv_n = esp->digest.icv_n; + iv_n = esp->cipher.iv_n; + head_n = ESP_HDR_N + iv_n; + text_n = skb->len - head_n - icv_n; + if(text_n < ESP_PAD_N || !multipleof(text_n, block_n)){ + wprintf("> Invalid size: text_n=%d tfm:block_n=%d esp:block_n=%d\n", + text_n, block_n, esp->cipher.block_n); + goto exit; + } + if(icv_n){ + err = esp_check_icv(sa, esp, esph, skb); + if(err) goto exit; + } + mine = 1; + if(sa->security & SA_CONF){ + err = esp_sa_decrypt(esp, esph, skb, head_n, iv_n, text_n); + if(err) goto exit; + } + // Strip esp header by moving the other headers down. + //todo Maybe not safe to do this anymore. + memmove(skb->mac.raw + head_n, skb->mac.raw, (skb->data - skb->mac.raw)); + skb->mac.raw += head_n; + skb->nh.raw += head_n; + // Move skb->data back to ethernet header. + // Do in 2 moves to ensure offsets are +ve, + // since args to skb_pull/skb_push are unsigned. + __skb_pull(skb, head_n); + __skb_push(skb, skb->data - skb->mac.raw); + // After this esph is invalid. + esph = NULL; + // Trim padding, restore protocol in IP header. + pad = skb_trim_tail(skb, ESP_PAD_N); + text_n -= ESP_PAD_N; + if((pad->pad_n > 255) | (pad->pad_n > text_n)){ + wprintf("> Invalid padding: pad_n=%d text_n=%d\n", pad->pad_n, text_n); + goto exit; + } + skb_trim_tail(skb, pad->pad_n); + skb->nh.iph->protocol = pad->protocol; + err = skb_push_context(skb, vnet, sa->ident.addr, IPPROTO_ESP, + sa, esp_context_free_fn); + if(err) goto exit; + // Increase sa refcount now the skb context refers to it. + SAState_incref(sa); + err = netif_rx(skb); + exit: + if(mine) err = 1; + dprintf("< skb=%p err=%d\n", skb, err); + return err; +} + +/** Estimate the packet size for some data using ESP processing. + * + * @param sa ESP SA + * @param data_n data size + * @return size after ESP processing + */ +static u32 esp_sa_size(SAState *sa, int data_n){ + // Even in transport mode have to round up to blocksize. + // Have to add some padding for alignment even if pad_n is zero. + ESPState *esp = sa->data; + + data_n = roundup(data_n + ESP_PAD_N, esp->cipher.block_n); + if(esp->cipher.pad_n > 0){ + data_n = roundup(data_n, esp->cipher.pad_n); + } + data_n += esp->digest.icv_n; + //data_n += esp->cipher.iv_n; + data_n += ESP_HDR_N; + return data_n; +} + +/** Compute an icv using HMAC digest. + * + * @param esp ESP state + * @param skb packet to digest + * @param offset offset to start at + * @param len number of bytes to digest + * @param icv return parameter for ICV + * @return 0 on success, negative error code otherwise + */ +static inline void esp_hmac_digest(ESPState *esp, struct sk_buff *skb, + int offset, int len, u8 *icv){ + int err = 0; + struct crypto_tfm *digest = esp->digest.tfm; + char *icv_tmp = esp->digest.icv_tmp; + int sg_n = skb_shinfo(skb)->nr_frags + 1; + struct scatterlist sg[sg_n]; + + dprintf("> offset=%d len=%d\n", offset, len); + memset(icv, 0, esp->digest.icv_n); + if(DEBUG_ICV){ + dprintf("> key len=%d\n", esp->digest.key_n); + printk("\nkey="); + buf_print(esp->digest.key,esp->digest.key_n); + } + crypto_hmac_init(digest, esp->digest.key, &esp->digest.key_n); + err = skb_scatterlist(skb, sg, &sg_n, offset, len); + crypto_hmac_update(digest, sg, sg_n); + crypto_hmac_final(digest, esp->digest.key, &esp->digest.key_n, icv_tmp); + if(DEBUG_ICV){ + dprintf("> digest len=%d ", esp->digest.icv_n); + printk("\nval="); + buf_print(icv_tmp, esp->digest.icv_n); + } + memcpy(icv, icv_tmp, esp->digest.icv_n); + dprintf("<\n"); +} + +/** Finish up an esp state. + * Releases the digest, cipher, iv and frees the state. + * + * @parma esp state + */ +static void esp_fini(ESPState *esp){ + if(!esp) return; + if(esp->digest.tfm){ + crypto_free_tfm(esp->digest.tfm); + esp->digest.tfm = NULL; + } + if(esp->digest.icv_tmp){ + kfree(esp->digest.icv_tmp); + esp->digest.icv_tmp = NULL; + } + if(esp->cipher.tfm){ + crypto_free_tfm(esp->cipher.tfm); + esp->cipher.tfm = NULL; + } + if(esp->cipher.iv){ + kfree(esp->cipher.iv); + esp->cipher.iv = NULL; + } + kfree(esp); +} + +/** Release an ESP SA. + * + * @param sa ESO SA + */ +static void esp_sa_fini(SAState *sa){ + ESPState *esp; + if(!sa) return; + esp = sa->data; + if(!esp) return; + esp_fini(esp); + sa->data = NULL; +} + +/** Initialize the cipher for an ESP SA. + * + * @param sa ESP SA + * @param esp ESP state + * @return 0 on success, negative error code otherwise + */ +static int esp_cipher_init(SAState *sa, ESPState *esp){ + int err = 0; + SAAlgorithm *algo = NULL; + int cipher_mode = CRYPTO_TFM_MODE_CBC; + + dprintf("> sa=%p esp=%p\n", sa, esp); + dprintf("> cipher=%s\n", sa->cipher.name); + algo = sa_cipher_by_name(sa->cipher.name); + if(!algo){ + wprintf("> Cipher unavailable: %s\n", sa->cipher.name); + err = -EINVAL; + goto exit; + } + esp->cipher.key_n = roundup(sa->cipher.bits, 8); + // If cipher is null must use ECB because CBC algo does not support blocksize 1. + if(strcmp(sa->cipher.name, "cipher_null")){ + cipher_mode = CRYPTO_TFM_MODE_ECB; + } + esp->cipher.tfm = crypto_alloc_tfm(sa->cipher.name, cipher_mode); + if(!esp->cipher.tfm){ + err = -ENOMEM; + goto exit; + } + esp->cipher.block_n = roundup(crypto_tfm_alg_blocksize(esp->cipher.tfm), 4); + esp->cipher.iv_n = crypto_tfm_alg_ivsize(esp->cipher.tfm); + esp->cipher.pad_n = 0; + if(esp->cipher.iv_n){ + esp->cipher.iv = kmalloc(esp->cipher.iv_n, GFP_KERNEL); + get_random_bytes(esp->cipher.iv, esp->cipher.iv_n); + } + crypto_cipher_setkey(esp->cipher.tfm, esp->cipher.key, esp->cipher.key_n); + err = 0; + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Initialize the digest for an ESP SA. + * + * @param sa ESP SA + * @param esp ESP state + * @return 0 on success, negative error code otherwise + */ +static int esp_digest_init(SAState *sa, ESPState *esp){ + int err = 0; + SAAlgorithm *algo = NULL; + + dprintf(">\n"); + esp->digest.key = sa->digest.key; + esp->digest.key_n = bits_to_bytes(roundup(sa->digest.bits, 8)); + esp->digest.tfm = crypto_alloc_tfm(sa->digest.name, 0); + if(!esp->digest.tfm){ + err = -ENOMEM; + goto exit; + } + algo = sa_digest_by_name(sa->digest.name); + if(!algo){ + wprintf("> Digest unavailable: %s\n", sa->digest.name); + err = -EINVAL; + goto exit; + } + esp->digest.icv = esp_hmac_digest; + esp->digest.icv_full_n = bits_to_bytes(algo->info.digest.icv_fullbits); + esp->digest.icv_n = bits_to_bytes(algo->info.digest.icv_truncbits); + + if(esp->digest.icv_full_n != crypto_tfm_alg_digestsize(esp->digest.tfm)){ + err = -EINVAL; + wprintf("> digest %s, size %u != %hu\n", + sa->digest.name, + crypto_tfm_alg_digestsize(esp->digest.tfm), + esp->digest.icv_full_n); + goto exit; + } + + esp->digest.icv_tmp = kmalloc(esp->digest.icv_full_n, GFP_KERNEL); + if(!esp->digest.icv_tmp){ + err = -ENOMEM; + goto exit; + } + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Initialize an ESP SA. + * + * @param sa ESP SA + * @param args arguments + * @return 0 on success, negative error code otherwise + */ +static int esp_sa_init(SAState *sa, void *args){ + int err = 0; + ESPState *esp = NULL; + + dprintf("> sa=%p\n", sa); + esp = kmalloc(sizeof(*esp), GFP_KERNEL); + if(!esp){ + err = -ENOMEM; + goto exit; + } + *esp = (ESPState){}; + err = esp_cipher_init(sa, esp); + if(err) goto exit; + err = esp_digest_init(sa, esp); + if(err) goto exit; + sa->data = esp; + exit: + if(err){ + if(esp) esp_fini(esp); + } + dprintf("< err=%d\n", err); + return err; +} + +/** SA type for ESP. + */ +static SAType esp_sa_type = { + .name = "ESP", + .protocol = IPPROTO_ESP, + .init = esp_sa_init, + .fini = esp_sa_fini, + .size = esp_sa_size, + .recv = esp_sa_recv, + .send = esp_sa_send +}; + +/** Get the ESP header from a packet. + * + * @param skb packet + * @param esph return parameter for header + * @return 0 on success, negative error code otherwise + */ +static int esp_skb_header(struct sk_buff *skb, ESPHdr **esph){ + int err = 0; + if(skb->len < ESP_HDR_N){ + err = -EINVAL; + goto exit; + } + *esph = (ESPHdr*)skb->data; + exit: + return err; +} + +/** Handle an incoming skb with ESP protocol. + * + * Lookup spi, if state found hand to the state. + * If no state, check spi, if ok, create state and pass to it. + * If spi not ok, drop. + * + * @param skb packet + * @return 0 on sucess, negative error code otherwise + */ +static int esp_protocol_recv(struct sk_buff *skb){ + int err = 0; + const int eth_n = ETH_HLEN; + int ip_n; + ESPHdr *esph = NULL; + SAState *sa = NULL; + u32 addr; + + dprintf(">\n"); + dprintf("> recv skb=\n"); skb_print_bits(skb, 0, skb->len); + ip_n = (skb->nh.iph->ihl << 2); + if(skb->data == skb->mac.raw){ + // skb->data points at ethernet header. + if (!pskb_may_pull(skb, eth_n + ip_n)){ + wprintf("> Malformed skb\n"); + err = -EINVAL; + goto exit; + } + skb_pull(skb, eth_n + ip_n); + } + addr = skb->nh.iph->daddr; + err = esp_skb_header(skb, &esph); + if(err) goto exit; + dprintf("> spi=%08x protocol=%d addr=" IPFMT "\n", + esph->spi, IPPROTO_ESP, NIPQUAD(addr)); + sa = sa_table_lookup_spi(esph->spi, IPPROTO_ESP, addr); + if(!sa){ + err = vnet_sa_create(esph->spi, IPPROTO_ESP, addr, &sa); + if(err) goto exit; + } + err = SAState_recv(sa, skb); + exit: + if(sa) SAState_decref(sa); + dprintf("< err=%d\n", err); + return err; +} + +/** Handle an ICMP error related to ESP. + * + * @param skb ICMP error packet + * @param info + */ +static void esp_protocol_icmp_err(struct sk_buff *skb, u32 info){ + struct iphdr *iph = (struct iphdr*)skb->data; + ESPHdr *esph; + SAState *sa; + + dprintf("> ICMP error type=%d code=%d\n", + skb->h.icmph->type, skb->h.icmph->code); + if(skb->h.icmph->type != ICMP_DEST_UNREACH || + skb->h.icmph->code != ICMP_FRAG_NEEDED){ + return; + } + + //todo: need to check skb has enough len to do this. + esph = (ESPHdr*)(skb->data + (iph->ihl << 2)); + sa = sa_table_lookup_spi(esph->spi, IPPROTO_ESP, iph->daddr); + if(!sa) return; + wprintf("> ICMP unreachable on SA ESP spi=%08x addr=" IPFMT "\n", + ntohl(esph->spi), NIPQUAD(iph->daddr)); + SAState_decref(sa); +} + +//============================================================================ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +// Code for 2.6 kernel. + +/** Protocol handler for ESP. + */ +static struct net_protocol esp_protocol = { + .handler = esp_protocol_recv, + .err_handler = esp_protocol_icmp_err +}; + +static int esp_protocol_add(void){ + return inet_add_protocol(&esp_protocol, IPPROTO_ESP); +} + +static int esp_protocol_del(void){ + return inet_del_protocol(&esp_protocol, IPPROTO_ESP); +} + +//============================================================================ +#else +//============================================================================ +// Code for 2.4 kernel. + +/** Protocol handler for ESP. + */ +static struct inet_protocol esp_protocol = { + .name = "ESP", + .protocol = IPPROTO_ESP, + .handler = esp_protocol_recv, + .err_handler = esp_protocol_icmp_err +}; + +static int esp_protocol_add(void){ + inet_add_protocol(&esp_protocol); + return 0; +} + +static int esp_protocol_del(void){ + return inet_del_protocol(&esp_protocol); +} + +#endif +//============================================================================ + + +/** Initialize the ESP module. + * Registers the ESP protocol and SA type. + * + * @return 0 on success, negative error code otherwise + */ +int __init esp_module_init(void){ + int err = 0; + dprintf(">\n"); + err = SAType_add(&esp_sa_type); + if(err < 0){ + eprintf("> Error adding esp sa type\n"); + goto exit; + } + esp_protocol_add(); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Finalize the ESP module. + * Deregisters the ESP protocol and SA type. + */ +void __exit esp_module_exit(void){ + if(esp_protocol_del() < 0){ + eprintf("> Error removing esp protocol\n"); + } + if(SAType_del(&esp_sa_type) < 0){ + eprintf("> Error removing esp sa type\n"); + } +} + diff --git a/tools/vnet/vnet-module/esp.h b/tools/vnet/vnet-module/esp.h new file mode 100644 index 0000000000..57d21a9ce7 --- /dev/null +++ b/tools/vnet/vnet-module/esp.h @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef __VNET_ESP_H__ +#define __VNET_ESP_H__ + +#include +#include +#include + +/** Header used by IPSEC ESP (Encapsulated Security Payload). */ +typedef struct ESPHdr { + /** The spi (security parameters index). */ + u32 spi; + /** Sequence number. */ + u32 seq; + /* Variable length data (depends on crypto suite). + Mind the 64 bit alignment! */ + u8 data[0]; +} ESPHdr; + +/** Padding trailer used by IPSEC ESP. + * Follows the padding itself with the padding length and the + * protocol being encapsulated. + */ +typedef struct ESPPadding { + u8 pad_n; + u8 protocol; +} ESPPadding; + +/** Size of the esp header (spi and seq). */ +static const int ESP_HDR_N = sizeof(ESPHdr); + +/** Size of the esp pad and next protocol field. */ +static const int ESP_PAD_N = sizeof(ESPPadding); + +enum { + SASTATE_VOID, + SASTATE_ACQUIRE, + SASTATE_VALID, + SASTATE_ERROR, + SASTATE_EXPIRED, + SASTATE_DEAD, +}; + +struct ESPState; + +/** A cipher instance. */ +typedef struct ESPCipher { + /** Cipher key. */ + u8 *key; + /** Key size (bytes). */ + int key_n; + /** Initialization vector (IV). */ + u8 *iv; + /** IV size (bytes). */ + int iv_n; + /** Block size for padding (bytes). */ + int pad_n; + /** Cipher block size (bytes). */ + int block_n; + /** Cipher crypto transform. */ + struct crypto_tfm *tfm; +} ESPCipher; + +/** A digest instance. */ +typedef struct ESPDigest { + /** Digest key. */ + u8 *key; + /** Key size (bytes) */ + int key_n; + /** ICV size used (bytes). */ + u8 icv_n; + /** Full ICV size when computed (bytes). */ + u8 icv_full_n; + /** Working storage for computing ICV. */ + u8 *icv_tmp; + /** Function used to compute ICV (e.g. HMAC). */ + void (*icv)(struct ESPState *esp, + struct sk_buff *skb, + int offset, + int len, + u8 *icv); + /** Digest crypto transform (e.g. SHA). */ + struct crypto_tfm *tfm; +} ESPDigest; + +typedef struct ESPState { + struct ESPCipher cipher; + struct ESPDigest digest; +} ESPState; + +extern int esp_module_init(void); +extern void esp_module_exit(void); + +#endif /* !__VNET_ESP_H__ */ diff --git a/tools/vnet/vnet-module/etherip.c b/tools/vnet/vnet-module/etherip.c new file mode 100644 index 0000000000..05486ed5a2 --- /dev/null +++ b/tools/vnet/vnet-module/etherip.c @@ -0,0 +1,411 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define MODULE_NAME "VNET" +//#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +/** @file Etherip implementation. + * The etherip protocol is used to transport Ethernet frames in IP packets. + */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +#define MAC_ETH(_skb) ((struct ethhdr *)(_skb)->mac.raw) +#else +#define MAC_ETH(_skb) ((_skb)->mac.ethernet) +#endif + +/** Get the vnet label from an etherip header. + * + * @param hdr header + * @return vnet (in host order) + */ +int etheriphdr_get_vnet(struct etheriphdr *hdr){ +#ifdef CONFIG_ETHERIP_EXT + return ntohl(hdr->vnet); +#else + return hdr->reserved; +#endif +} + +/** Set the vnet label in an etherip header. + * Also sets the etherip version. + * + * @param hdr header + * @param vnet vnet label (in host order) + */ +void etheriphdr_set_vnet(struct etheriphdr *hdr, int vnet){ +#ifdef CONFIG_ETHERIP_EXT + hdr->version = 4; + hdr->vnet = htonl(vnet); +#else + hdr->version = 3; + hdr->reserved = vnet & 0x0fff; +#endif +} + +/** Open an etherip tunnel. + * + * @param tunnel to open + * @return 0 on success, error code otherwise + */ +static int etherip_tunnel_open(Tunnel *tunnel){ + return 0; +} + +/** Close an etherip tunnel. + * + * @param tunnel to close + */ +static void etherip_tunnel_close(Tunnel *tunnel){ +} + + +/** Send a packet via an etherip tunnel. + * Adds etherip header, new ip header, new ethernet header around + * ethernet frame. + * + * @param tunnel tunnel + * @param skb packet + * @return 0 on success, error code otherwise + */ +static int etherip_tunnel_send(Tunnel *tunnel, struct sk_buff *skb){ + int err = 0; + const int etherip_n = sizeof(struct etheriphdr); + const int ip_n = sizeof(struct iphdr); + const int eth_n = ETH_HLEN; + int head_n = 0; + int vnet = tunnel->key.vnet; + struct etheriphdr *etheriph; + struct ethhdr *ethh; + u32 saddr = 0; + + dprintf("> skb=%p vnet=%d\n", skb, vnet); + head_n = etherip_n + ip_n + eth_n; + err = skb_make_room(&skb, skb, head_n, 0); + if(err) goto exit; + + //err = vnet_get_device_address(skb->dev, &saddr); + //if(err) goto exit; + + // The original ethernet header. + ethh = MAC_ETH(skb); + //print_skb_data(__FUNCTION__, 0, skb, skb->mac.raw, skb->len); + // Null the pointer as we are pushing a new IP header. + skb->mac.raw = NULL; + + // Setup the etherip header. + //dprintf("> push etherip header...\n"); + etheriph = (struct etheriphdr *)skb_push(skb, etherip_n); + etheriphdr_set_vnet(etheriph, vnet); + + // Setup the IP header. + //dprintf("> push IP header...\n"); + skb->nh.raw = skb_push(skb, ip_n); + skb->nh.iph->version = 4; // Standard version. + skb->nh.iph->ihl = ip_n / 4; // IP header length (32-bit words). + skb->nh.iph->tos = 0; // No special type-of-service. + skb->nh.iph->tot_len = htons(skb->len); // Total packet length (bytes). + skb->nh.iph->id = 0; // No flow id (since no frags). + skb->nh.iph->frag_off = htons(IP_DF); // Don't fragment - can't handle frags. + skb->nh.iph->ttl = 64; // Linux default time-to-live. + skb->nh.iph->protocol = IPPROTO_ETHERIP; // IP protocol number. + skb->nh.iph->saddr = saddr; // Source address. + skb->nh.iph->daddr = tunnel->key.addr; // Destination address. + skb->nh.iph->check = 0; + + // Ethernet header will be filled-in by device. + err = Tunnel_send(tunnel->base, skb); + skb = NULL; + exit: + if(err && skb) dev_kfree_skb(skb); + //dprintf("< err=%d\n", err); + return err; +} + +/** Tunnel type for etherip. + */ +static TunnelType _etherip_tunnel_type = { + .name = "ETHERIP", + .open = etherip_tunnel_open, + .close = etherip_tunnel_close, + .send = etherip_tunnel_send +}; + +TunnelType *etherip_tunnel_type = &_etherip_tunnel_type; + +/* Defeat compiler warnings about unused functions. */ +static void print_str(char *s, int n) __attribute__((unused)); + +static void print_str(char *s, int n) { + int i; + + for(i=0; i", (unsigned)(0xff & *s)); + } + } + printk("\n"); +} + +/** Do etherip receive processing. + * Strips etherip header to extract the ethernet frame, sets + * the vnet from the header and re-receives the frame. + * + * @param skb packet + * @return 0 on success, error code otherwise + */ +static int etherip_protocol_recv(struct sk_buff *skb){ + int err = 0; + int mine = 0; + const int eth_n = ETH_HLEN; + int ip_n; + const int etherip_n = sizeof(struct etheriphdr); + struct etheriphdr *etheriph; + struct ethhdr *ethhdr; + Vnet *vinfo = NULL; + u32 vnet; + + ethhdr = MAC_ETH(skb); + if(MULTICAST(skb->nh.iph->daddr) && + (skb->nh.iph->daddr != varp_mcast_addr)){ + // Ignore multicast packets not addressed to us. + dprintf("> dst=%u.%u.%u.%u varp_mcast_addr=%u.%u.%u.%u\n", + NIPQUAD(skb->nh.iph->daddr), + NIPQUAD(varp_mcast_addr)); + goto exit; + } + ip_n = (skb->nh.iph->ihl << 2); + if(skb->data == skb->mac.raw){ + // skb->data points at ethernet header. + //dprintf("> len=%d\n", skb->len); + if (!pskb_may_pull(skb, eth_n + ip_n)){ + wprintf("> Malformed skb\n"); + err = -EINVAL; + goto exit; + } + skb_pull(skb, eth_n + ip_n); + } + // Assume skb->data points at etherip header. + etheriph = (void*)skb->data; + if(!pskb_may_pull(skb, etherip_n)){ + wprintf("> Malformed skb\n"); + err = -EINVAL; + goto exit; + } + vnet = etheriphdr_get_vnet(etheriph); + dprintf("> Rcvd skb=%p vnet=%d\n", skb, vnet); + // If vnet is secure, context must include IPSEC ESP. + err = vnet_check_context(vnet, SKB_CONTEXT(skb), &vinfo); + Vnet_decref(vinfo); + if(err){ + wprintf("> Failed security check\n"); + goto exit; + } + mine = 1; + // Point at the headers in the contained ethernet frame. + skb->mac.raw = skb_pull(skb, etherip_n); + + // Know source ip, vnet, vmac, so could update varp cache. + // But if traffic comes to us over a vnetd tunnel this points the coa + // at the vnetd rather than the endpoint. So don't do it. + //varp_update(htonl(vnet), MAC_ETH(skb)->h_source, skb->nh.iph->saddr); + + // Assuming a standard Ethernet frame. + skb->nh.raw = skb_pull(skb, ETH_HLEN); + +#ifdef CONFIG_NETFILTER +#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) + // This stops our new pkt header being clobbered by a subsequent + // call to nf_bridge_maybe_copy_header. Just replicate the + // corresponding nf_bridge_save_header. + if(skb->nf_bridge){ + int header_size = 16; + if(MAC_ETH(skb)->h_proto == __constant_htons(ETH_P_8021Q)) { + header_size = 18; + } + memcpy(skb->nf_bridge->data, skb->data - header_size, header_size); + } +#endif +#endif + + if(1){ + struct ethhdr *eth = MAC_ETH(skb); + // Devices use eth_type_trans() to set skb->pkt_type and skb->protocol. + // Set them from contained ethhdr, or leave as received? + // 'Ware use of hard_header_len in eth_type_trans(). + + //skb->protocol = htons(ETH_P_IP); + + if(ntohs(eth->h_proto) >= 1536){ + skb->protocol = eth->h_proto; + } else { + skb->protocol = htons(ETH_P_802_2); + } + + if(mac_is_multicast(eth->h_dest)){ + if(mac_is_broadcast(eth->h_dest)){ + skb->pkt_type = PACKET_BROADCAST; + } else { + skb->pkt_type = PACKET_MULTICAST; + } + } else { + skb->pkt_type = PACKET_HOST; + } + + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + if (skb->ip_summed == CHECKSUM_HW){ + skb->ip_summed = CHECKSUM_NONE; + //skb->csum = csum_sub(skb->csum, + // csum_partial(skb->mac.raw, skb->nh.raw - skb->mac.raw, 0)); + } + dst_release(skb->dst); + skb->dst = NULL; +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug = 0; +#endif +#endif + } + + //print_skb_data(__FUNCTION__, 0, skb, skb->mac.raw, skb->len + ETH_HLEN); + + err = vnet_skb_recv(skb, vnet, (Vmac*)MAC_ETH(skb)->h_dest); + exit: + if(mine) err = 1; + dprintf("< skb=%p err=%d\n", skb, err); + return err; +} + +/** Handle an ICMP error related to etherip. + * + * @param skb ICMP error packet + * @param info + */ +static void etherip_protocol_icmp_err(struct sk_buff *skb, u32 info){ + struct iphdr *iph = (struct iphdr*)skb->data; + + wprintf("> ICMP error type=%d code=%d addr=" IPFMT "\n", + skb->h.icmph->type, skb->h.icmph->code, NIPQUAD(iph->daddr)); + + if (skb->h.icmph->type != ICMP_DEST_UNREACH || + skb->h.icmph->code != ICMP_FRAG_NEEDED){ + return; + } + wprintf("> MTU too big addr= " IPFMT "\n", NIPQUAD(iph->daddr)); +} + +//============================================================================ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +// Code for 2.6 kernel. + +/** Etherip protocol. */ +static struct net_protocol etherip_protocol = { + .handler = etherip_protocol_recv, + .err_handler = etherip_protocol_icmp_err, +}; + +static int etherip_protocol_add(void){ + return inet_add_protocol(ðerip_protocol, IPPROTO_ETHERIP); +} + +static int etherip_protocol_del(void){ + return inet_del_protocol(ðerip_protocol, IPPROTO_ETHERIP); +} + +//============================================================================ +#else +//============================================================================ +// Code for 2.4 kernel. + +/** Etherip protocol. */ +static struct inet_protocol etherip_protocol = { + .name = "ETHERIP", + .protocol = IPPROTO_ETHERIP, + .handler = etherip_protocol_recv, + .err_handler = etherip_protocol_icmp_err, +}; + +static int etherip_protocol_add(void){ + inet_add_protocol(ðerip_protocol); + return 0; +} + +static int etherip_protocol_del(void){ + return inet_del_protocol(ðerip_protocol); +} + +#endif +//============================================================================ + + +/** Initialize the etherip module. + * Registers the etherip protocol. + * + * @return 0 on success, error code otherwise + */ +int __init etherip_module_init(void) { + int err = 0; + etherip_protocol_add(); + return err; +} + +/** Finalize the etherip module. + * Deregisters the etherip protocol. + */ +void __exit etherip_module_exit(void) { + if(etherip_protocol_del() < 0){ + printk(KERN_INFO "%s: can't remove etherip protocol\n", __FUNCTION__); + } +} diff --git a/tools/vnet/vnet-module/etherip.h b/tools/vnet/vnet-module/etherip.h new file mode 100644 index 0000000000..e8c23c2c2f --- /dev/null +++ b/tools/vnet/vnet-module/etherip.h @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _VNET_ETHERIP_H_ +#define _VNET_ETHERIP_H_ + +#include "if_etherip.h" + +extern int etherip_module_init(void); +extern void etherip_module_exit(void); + +#endif diff --git a/tools/vnet/vnet-module/if_etherip.h b/tools/vnet/vnet-module/if_etherip.h new file mode 100644 index 0000000000..272c345d78 --- /dev/null +++ b/tools/vnet/vnet-module/if_etherip.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _VNET_IF_ETHERIP_H_ +#define _VNET_IF_ETHERIP_H_ +/*----------------------------------------------------------------------------*/ +#ifdef CONFIG_ETHERIP_EXT +struct etheriphdr { + __u8 version; + __u32 vnet; +} __attribute__ ((packed)); + +/*----------------------------------------------------------------------------*/ +#else +struct etheriphdr +{ +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 reserved:12, + version:4; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u16 version:4, + reserved:12; +#else +#error "Please fix " +#endif + +}; +#endif + +#ifndef IPPROTO_ETHERIP +#define IPPROTO_ETHERIP 97 +#endif + +/*----------------------------------------------------------------------------*/ + +#endif /* ! _VNET_IF_ETHERIP_H_ */ diff --git a/tools/vnet/vnet-module/if_varp.h b/tools/vnet/vnet-module/if_varp.h new file mode 100644 index 0000000000..49058471db --- /dev/null +++ b/tools/vnet/vnet-module/if_varp.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _VNET_IF_VARP_H +#define _VNET_IF_VARP_H + +typedef struct Vmac { + unsigned char mac[ETH_ALEN]; +} Vmac; + +enum { + VARP_ID = 1, + VARP_OP_REQUEST = 1, + VARP_OP_ANNOUNCE = 2, +}; + +typedef struct VnetMsgHdr { + uint16_t id; + uint16_t opcode; +} __attribute__((packed)) VnetMsgHdr; + +typedef struct VarpHdr { + VnetMsgHdr; + uint32_t vnet; + Vmac vmac; + uint32_t addr; +} __attribute__((packed)) VarpHdr; + +/** Default address for varp/vnet broadcasts: 224.10.0.1 */ +#define VARP_MCAST_ADDR 0xe00a0001 + +/** UDP port to use for varp protocol. */ +#define VARP_PORT 1798 + + + +#endif /* ! _VNET_IF_VARP_H */ diff --git a/tools/vnet/vnet-module/linux/pfkeyv2.h b/tools/vnet/vnet-module/linux/pfkeyv2.h new file mode 100644 index 0000000000..cf3a2f1622 --- /dev/null +++ b/tools/vnet/vnet-module/linux/pfkeyv2.h @@ -0,0 +1,329 @@ +/* PF_KEY user interface, this is defined by rfc2367 so + * do not make arbitrary modifications or else this header + * file will not be compliant. + */ + +#ifndef _LINUX_PFKEY2_H +#define _LINUX_PFKEY2_H + +#include + +#define PF_KEY_V2 2 +#define PFKEYV2_REVISION 199806L + +struct sadb_msg { + uint8_t sadb_msg_version; + uint8_t sadb_msg_type; + uint8_t sadb_msg_errno; + uint8_t sadb_msg_satype; + uint16_t sadb_msg_len; + uint16_t sadb_msg_reserved; + uint32_t sadb_msg_seq; + uint32_t sadb_msg_pid; +} __attribute__((packed)); +/* sizeof(struct sadb_msg) == 16 */ + +struct sadb_ext { + uint16_t sadb_ext_len; + uint16_t sadb_ext_type; +} __attribute__((packed)); +/* sizeof(struct sadb_ext) == 4 */ + +struct sadb_sa { + uint16_t sadb_sa_len; + uint16_t sadb_sa_exttype; + uint32_t sadb_sa_spi; + uint8_t sadb_sa_replay; + uint8_t sadb_sa_state; + uint8_t sadb_sa_auth; + uint8_t sadb_sa_encrypt; + uint32_t sadb_sa_flags; +} __attribute__((packed)); +/* sizeof(struct sadb_sa) == 16 */ + +struct sadb_lifetime { + uint16_t sadb_lifetime_len; + uint16_t sadb_lifetime_exttype; + uint32_t sadb_lifetime_allocations; + uint64_t sadb_lifetime_bytes; + uint64_t sadb_lifetime_addtime; + uint64_t sadb_lifetime_usetime; +} __attribute__((packed)); +/* sizeof(struct sadb_lifetime) == 32 */ + +struct sadb_address { + uint16_t sadb_address_len; + uint16_t sadb_address_exttype; + uint8_t sadb_address_proto; + uint8_t sadb_address_prefixlen; + uint16_t sadb_address_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_address) == 8 */ + +struct sadb_key { + uint16_t sadb_key_len; + uint16_t sadb_key_exttype; + uint16_t sadb_key_bits; + uint16_t sadb_key_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_key) == 8 */ + +struct sadb_ident { + uint16_t sadb_ident_len; + uint16_t sadb_ident_exttype; + uint16_t sadb_ident_type; + uint16_t sadb_ident_reserved; + uint64_t sadb_ident_id; +} __attribute__((packed)); +/* sizeof(struct sadb_ident) == 16 */ + +struct sadb_sens { + uint16_t sadb_sens_len; + uint16_t sadb_sens_exttype; + uint32_t sadb_sens_dpd; + uint8_t sadb_sens_sens_level; + uint8_t sadb_sens_sens_len; + uint8_t sadb_sens_integ_level; + uint8_t sadb_sens_integ_len; + uint32_t sadb_sens_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_sens) == 16 */ + +/* followed by: + uint64_t sadb_sens_bitmap[sens_len]; + uint64_t sadb_integ_bitmap[integ_len]; */ + +struct sadb_prop { + uint16_t sadb_prop_len; + uint16_t sadb_prop_exttype; + uint8_t sadb_prop_replay; + uint8_t sadb_prop_reserved[3]; +} __attribute__((packed)); +/* sizeof(struct sadb_prop) == 8 */ + +/* followed by: + struct sadb_comb sadb_combs[(sadb_prop_len + + sizeof(uint64_t) - sizeof(struct sadb_prop)) / + sizeof(strut sadb_comb)]; */ + +struct sadb_comb { + uint8_t sadb_comb_auth; + uint8_t sadb_comb_encrypt; + uint16_t sadb_comb_flags; + uint16_t sadb_comb_auth_minbits; + uint16_t sadb_comb_auth_maxbits; + uint16_t sadb_comb_encrypt_minbits; + uint16_t sadb_comb_encrypt_maxbits; + uint32_t sadb_comb_reserved; + uint32_t sadb_comb_soft_allocations; + uint32_t sadb_comb_hard_allocations; + uint64_t sadb_comb_soft_bytes; + uint64_t sadb_comb_hard_bytes; + uint64_t sadb_comb_soft_addtime; + uint64_t sadb_comb_hard_addtime; + uint64_t sadb_comb_soft_usetime; + uint64_t sadb_comb_hard_usetime; +} __attribute__((packed)); +/* sizeof(struct sadb_comb) == 72 */ + +struct sadb_supported { + uint16_t sadb_supported_len; + uint16_t sadb_supported_exttype; + uint32_t sadb_supported_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_supported) == 8 */ + +/* followed by: + struct sadb_alg sadb_algs[(sadb_supported_len + + sizeof(uint64_t) - sizeof(struct sadb_supported)) / + sizeof(struct sadb_alg)]; */ + +struct sadb_alg { + uint8_t sadb_alg_id; + uint8_t sadb_alg_ivlen; + uint16_t sadb_alg_minbits; + uint16_t sadb_alg_maxbits; + uint16_t sadb_alg_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_alg) == 8 */ + +struct sadb_spirange { + uint16_t sadb_spirange_len; + uint16_t sadb_spirange_exttype; + uint32_t sadb_spirange_min; + uint32_t sadb_spirange_max; + uint32_t sadb_spirange_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_spirange) == 16 */ + +struct sadb_x_kmprivate { + uint16_t sadb_x_kmprivate_len; + uint16_t sadb_x_kmprivate_exttype; + u_int32_t sadb_x_kmprivate_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_x_kmprivate) == 8 */ + +struct sadb_x_sa2 { + uint16_t sadb_x_sa2_len; + uint16_t sadb_x_sa2_exttype; + uint8_t sadb_x_sa2_mode; + uint8_t sadb_x_sa2_reserved1; + uint16_t sadb_x_sa2_reserved2; + uint32_t sadb_x_sa2_sequence; + uint32_t sadb_x_sa2_reqid; +} __attribute__((packed)); +/* sizeof(struct sadb_x_sa2) == 16 */ + +struct sadb_x_policy { + uint16_t sadb_x_policy_len; + uint16_t sadb_x_policy_exttype; + uint16_t sadb_x_policy_type; + uint8_t sadb_x_policy_dir; + uint8_t sadb_x_policy_reserved; + uint32_t sadb_x_policy_id; + uint32_t sadb_x_policy_reserved2; +} __attribute__((packed)); +/* sizeof(struct sadb_x_policy) == 16 */ + +struct sadb_x_ipsecrequest { + uint16_t sadb_x_ipsecrequest_len; + uint16_t sadb_x_ipsecrequest_proto; + uint8_t sadb_x_ipsecrequest_mode; + uint8_t sadb_x_ipsecrequest_level; + uint16_t sadb_x_ipsecrequest_reqid; +} __attribute__((packed)); +/* sizeof(struct sadb_x_ipsecrequest) == 16 */ + +/* This defines the TYPE of Nat Traversal in use. Currently only one + * type of NAT-T is supported, draft-ietf-ipsec-udp-encaps-06 + */ +struct sadb_x_nat_t_type { + uint16_t sadb_x_nat_t_type_len; + uint16_t sadb_x_nat_t_type_exttype; + uint8_t sadb_x_nat_t_type_type; + uint8_t sadb_x_nat_t_type_reserved[3]; +} __attribute__((packed)); +/* sizeof(struct sadb_x_nat_t_type) == 8 */ + +/* Pass a NAT Traversal port (Source or Dest port) */ +struct sadb_x_nat_t_port { + uint16_t sadb_x_nat_t_port_len; + uint16_t sadb_x_nat_t_port_exttype; + uint16_t sadb_x_nat_t_port_port; + uint16_t sadb_x_nat_t_port_reserved; +} __attribute__((packed)); +/* sizeof(struct sadb_x_nat_t_port) == 8 */ + +/* Message types */ +#define SADB_RESERVED 0 +#define SADB_GETSPI 1 +#define SADB_UPDATE 2 +#define SADB_ADD 3 +#define SADB_DELETE 4 +#define SADB_GET 5 +#define SADB_ACQUIRE 6 +#define SADB_REGISTER 7 +#define SADB_EXPIRE 8 +#define SADB_FLUSH 9 +#define SADB_DUMP 10 +#define SADB_X_PROMISC 11 +#define SADB_X_PCHANGE 12 +#define SADB_X_SPDUPDATE 13 +#define SADB_X_SPDADD 14 +#define SADB_X_SPDDELETE 15 +#define SADB_X_SPDGET 16 +#define SADB_X_SPDACQUIRE 17 +#define SADB_X_SPDDUMP 18 +#define SADB_X_SPDFLUSH 19 +#define SADB_X_SPDSETIDX 20 +#define SADB_X_SPDEXPIRE 21 +#define SADB_X_SPDDELETE2 22 +#define SADB_X_NAT_T_NEW_MAPPING 23 +#define SADB_MAX 23 + +/* Security Association flags */ +#define SADB_SAFLAGS_PFS 1 + +/* Security Association states */ +#define SADB_SASTATE_LARVAL 0 +#define SADB_SASTATE_MATURE 1 +#define SADB_SASTATE_DYING 2 +#define SADB_SASTATE_DEAD 3 +#define SADB_SASTATE_MAX 3 + +/* Security Association types */ +#define SADB_SATYPE_UNSPEC 0 +#define SADB_SATYPE_AH 2 +#define SADB_SATYPE_ESP 3 +#define SADB_SATYPE_RSVP 5 +#define SADB_SATYPE_OSPFV2 6 +#define SADB_SATYPE_RIPV2 7 +#define SADB_SATYPE_MIP 8 +#define SADB_X_SATYPE_IPCOMP 9 +#define SADB_SATYPE_MAX 9 + +/* Authentication algorithms */ +#define SADB_AALG_NONE 0 +#define SADB_AALG_MD5HMAC 2 +#define SADB_AALG_SHA1HMAC 3 +#define SADB_X_AALG_SHA2_256HMAC 5 +#define SADB_X_AALG_SHA2_384HMAC 6 +#define SADB_X_AALG_SHA2_512HMAC 7 +#define SADB_X_AALG_RIPEMD160HMAC 8 +#define SADB_X_AALG_NULL 251 /* kame */ +#define SADB_AALG_MAX 251 + +/* Encryption algorithms */ +#define SADB_EALG_NONE 0 +#define SADB_EALG_DESCBC 2 +#define SADB_EALG_3DESCBC 3 +#define SADB_X_EALG_CASTCBC 6 +#define SADB_X_EALG_BLOWFISHCBC 7 +#define SADB_EALG_NULL 11 +#define SADB_X_EALG_AESCBC 12 +#define SADB_EALG_MAX 12 + +/* Compression algorithms */ +#define SADB_X_CALG_NONE 0 +#define SADB_X_CALG_OUI 1 +#define SADB_X_CALG_DEFLATE 2 +#define SADB_X_CALG_LZS 3 +#define SADB_X_CALG_LZJH 4 +#define SADB_X_CALG_MAX 4 + +/* Extension Header values */ +#define SADB_EXT_RESERVED 0 +#define SADB_EXT_SA 1 +#define SADB_EXT_LIFETIME_CURRENT 2 +#define SADB_EXT_LIFETIME_HARD 3 +#define SADB_EXT_LIFETIME_SOFT 4 +#define SADB_EXT_ADDRESS_SRC 5 +#define SADB_EXT_ADDRESS_DST 6 +#define SADB_EXT_ADDRESS_PROXY 7 +#define SADB_EXT_KEY_AUTH 8 +#define SADB_EXT_KEY_ENCRYPT 9 +#define SADB_EXT_IDENTITY_SRC 10 +#define SADB_EXT_IDENTITY_DST 11 +#define SADB_EXT_SENSITIVITY 12 +#define SADB_EXT_PROPOSAL 13 +#define SADB_EXT_SUPPORTED_AUTH 14 +#define SADB_EXT_SUPPORTED_ENCRYPT 15 +#define SADB_EXT_SPIRANGE 16 +#define SADB_X_EXT_KMPRIVATE 17 +#define SADB_X_EXT_POLICY 18 +#define SADB_X_EXT_SA2 19 +/* The next four entries are for setting up NAT Traversal */ +#define SADB_X_EXT_NAT_T_TYPE 20 +#define SADB_X_EXT_NAT_T_SPORT 21 +#define SADB_X_EXT_NAT_T_DPORT 22 +#define SADB_X_EXT_NAT_T_OA 23 +#define SADB_EXT_MAX 23 + +/* Identity Extension values */ +#define SADB_IDENTTYPE_RESERVED 0 +#define SADB_IDENTTYPE_PREFIX 1 +#define SADB_IDENTTYPE_FQDN 2 +#define SADB_IDENTTYPE_USERFQDN 3 +#define SADB_IDENTTYPE_MAX 3 + +#endif /* !(_LINUX_PFKEY2_H) */ diff --git a/tools/vnet/vnet-module/random.c b/tools/vnet/vnet-module/random.c new file mode 100644 index 0000000000..642937c006 --- /dev/null +++ b/tools/vnet/vnet-module/random.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include + +#include "hash_table.h" + +#define MODULE_NAME "RANDOM" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +/** @file + * Source of randomness. + * Current implementation is not enough. + * Needs to be cryptographically strong. + */ + +static unsigned long seed = 0; +static unsigned long count = 0; + +static unsigned long stir(unsigned long *a, unsigned long b){ + pseudo_des(a, &b); + return b; +} + +/** Get one random byte. + * + * @return random byte + */ +int get_random_byte(void){ + return stir(&seed, ++count); +} + +#if 0 +/* Get some random bytes. + * + * @param dst destination for the bytes + * @param dst_n number of bytes to get + */ +void get_random_bytes(void *dst, int dst_n){ + int i; + char *p = (char *)dst; + for(i = 0; i < dst_n; i++){ + *p++ = get_random_byte(); + } +} +#endif + +/** Contribute a random byte. + * + * @param b byte to contribute + */ +void add_random_byte(int b){ + stir(&seed, ++count); + stir(&seed, b); +} + +/** Contribute some random bytes. + * + * @param src bytes to contribute + * @param src_n number of bytes + */ +void add_random_bytes(const void *src, int src_n){ + int i; + char *p = (char *)src; + for(i = 0; i < src_n; i++){ + add_random_byte(*p++); + } +} + +int __init random_module_init(void){ + int dummy; + int tmp = jiffies; + seed = (unsigned long)&dummy; + add_random_byte(tmp); + return 0; +} + +void __exit random_module_exit(void){ +} + diff --git a/tools/vnet/vnet-module/random.h b/tools/vnet/vnet-module/random.h new file mode 100644 index 0000000000..e1f95f8603 --- /dev/null +++ b/tools/vnet/vnet-module/random.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef __VNET_RANDOM_H__ +#define __VNET_RANDOM_H__ + +extern int get_random_byte(void); +extern void get_random_bytes(void *dst, int dst_n); +extern void add_random_byte(int b); +extern void add_random_bytes(const void *src, int src_n); + +extern int random_module_init(void); +extern void random_module_exit(void); + +#endif /* ! __VNET_RANDOM_H__ */ diff --git a/tools/vnet/vnet-module/sa.c b/tools/vnet/vnet-module/sa.c new file mode 100644 index 0000000000..b0aa67e68f --- /dev/null +++ b/tools/vnet/vnet-module/sa.c @@ -0,0 +1,670 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include "hash_table.h" +#include "allocate.h" + +#define MODULE_NAME "IPSEC" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +/** @file IPSEC Security Association (SA). + */ + +/** Maximum number of protocols.*/ +#define INET_PROTOCOL_MAX 256 + +/** Table of SA types indexed by protocol. */ +static SAType *sa_type[INET_PROTOCOL_MAX] = {}; + +/** Hash a protocol number. + * + * @param protocol protocol number + * @return hashcode + */ +static inline unsigned char InetProtocol_hash(int protocol){ + return (protocol) & (INET_PROTOCOL_MAX - 1); +} + +/** Register an SA type. + * It is an error if an SA type is already registered for the protocol. + * + * @param type SA type + * @return 0 on success, error code otherwise + */ +int SAType_add(SAType *type){ + int err = -EINVAL; + int hash; + if(!type) goto exit; + hash = InetProtocol_hash(type->protocol); + if(sa_type[hash]) goto exit; + err = 0; + sa_type[hash] = type; + exit: + return err; +} + +/** Deregister an SA type. + * It is an error if no SA type is registered for the protocol. + * + * @param type SA type + * @return 0 on success, error code otherwise + */ +int SAType_del(SAType *type){ + int err = -EINVAL; + int hash; + if(!type) goto exit; + hash = InetProtocol_hash(type->protocol); + if(!sa_type[hash]) goto exit; + err = 0; + sa_type[hash] = NULL; + exit: + return err; +} + +int SAType_get(int protocol, SAType **type){ + int err = -ENOENT; + int hash; + hash = InetProtocol_hash(protocol); + *type = sa_type[hash]; + if(!*type) goto exit; + err = 0; + exit: + return err; +} + +/* Defeat compiler warnings about unused functions. */ +static int sa_key_check(SAKey *key, enum sa_alg_type type) __attribute__((unused)); +static u32 random_spi(void) __attribute__((unused)); +static u32 generate_key(u32 key, u32 offset, u32 spi) __attribute__((unused)); + +/** Check a key has an acceptable length for an algorithm. + * + * @param key key + * @param type algorithm + * @return 0 on success, error code otherwise + */ +static int sa_key_check(SAKey *key, enum sa_alg_type type){ + return 0; +} + +static unsigned long sa_spi_counter = 0; + +/** Generate a random spi. + * Uses a hashed counter. + * + * @return spi + */ +static u32 random_spi(void){ + unsigned long left, right = 0; + u32 spi; + do{ + left = sa_spi_counter++; + pseudo_des(&left, &right); + spi = right; + } while(!spi); + return spi; +} + +/** Mangle some input to generate output. + * This is used to derive spis and keying material from secrets, + * so it probably ought to be cryptographically strong. + * Probably ought to use a good hash (sha1) or cipher (aes). + * + * @param input input values + * @param n number of values + * @return mangled value + */ +static u32 mangle(u32 input[], int n){ + unsigned long left = 0, right = 0; + int i; + for(i=0; i\n"); + spi = mangle(input, 4); + dprintf("< spi=%x\n", spi); + return spi; +} + +/** Generate keying material for a given spi, based on a + * secret. + * + * @param key secret + * @param offset offset + * @param spi spi + * @return keying material + */ +static u32 generate_key(u32 key, u32 offset, u32 spi){ + u32 input[] = { key, offset, spi }; + return mangle(input, 3); +} + +/** Allocate a spi. + * Want to use random ones. + * So check for ones not in use. + * + * When using static keying, both ends need to agree on key. + * How does that work? Also, will suddenly get traffic using a spi, + * and will have to create SA then. Or need to create in advance. + * But can't do that because don't know peers. + * When get message on a spi that doesn't exist - do what? + * Use a spi related to the destination addr and a secret. + * Then receiver can check if spi is ok and create SA on demand. + * Use hash of key, protocol, addr to generate. Then have to check + * for in-use because of potential collisions. Receiver can do the + * same hash and check spi is in usable range. Then derive keys from + * the spi (using another secret). + * + * @param key spi generation key + * @param protocol protocol + * @param addr IP address + * @param spip return parameter for spi + * @return 0 on success, error code otherwise + */ +int sa_spi_alloc(u32 key, u32 protocol, u32 addr, u32 *spip){ + int err = 0; + int i = 0, n = 100; + u32 spi; + for(i = 0; i < n; i++, spi++){ + spi = generate_spi(key, i, protocol, addr); + if(!spi) continue; + if(!sa_table_lookup_spi(spi, protocol, addr)){ + *spip = spi; + goto exit; + } + } + err = -ENOMEM; + exit: + return err; +} + +/** Table of SAs. Indexed by unique id and spi/protocol/addr triple. + */ +static HashTable *sa_table = NULL; + +static u32 sa_id = 1; + +/** Hash an SA id. + * + * @param id SA id + * @return hashcode + */ +static inline Hashcode sa_table_hash_id(u32 id){ + return hash_ul(id); +} + +/** Hash SA spi/protocol/addr. + * + * @param spi spi + * @param protocol protocol + * @param addr IP address + * @return hashcode + */ +static inline Hashcode sa_table_hash_spi(u32 spi, u32 protocol, u32 addr){ + Hashcode h = 0; + h = hash_2ul(spi, protocol); + h = hash_hul(h, addr); + return h; +} + +/** Test if an SA entry has a given value. + * + * @param arg contains SA pointer + * @param table hashtable + * @param entry entry containing SA + * @return 1 if it does, 0 otherwise + */ +static int sa_table_state_fn(TableArg arg, HashTable *table, HTEntry *entry){ + return entry->value == arg.ptr; +} + +/** Test if an SA entry has a given id. + * + * @param arg contains SA id + * @param table hashtable + * @param entry entry containing SA + * @return 1 if it does, 0 otherwise + */ +static int sa_table_id_fn(TableArg arg, HashTable *table, HTEntry *entry){ + SAState *state = entry->value; + u32 id = arg.ul; + return state->ident.id == id; +} + +/** Test if an SA entry has a given spi/protocol/addr. + * + * @param arg contains SAIdent pointer + * @param table hashtable + * @param entry entry containing SA + * @return 1 if it does, 0 otherwise + */ +static int sa_table_spi_fn(TableArg arg, HashTable *table, HTEntry *entry){ + SAState *state = entry->value; + SAIdent *ident = arg.ptr; + return state->ident.spi == ident->spi + && state->ident.protocol == ident->protocol + && state->ident.addr == ident->addr; +} + +/** Free an SA entry. Decrements the SA refcount and frees the entry. + * + * @param table containing table + * @param entry to free + */ +void sa_table_free_fn(HashTable *table, HTEntry *entry){ + if(!entry) return; + if(entry->value){ + SAState *state = entry->value; + SAState_decref(state); + } + deallocate(entry); +} + +/** Initialize the SA table. + * + * @return 0 on success, error code otherwise + */ +int sa_table_init(void){ + int err = 0; + sa_table = HashTable_new(0); + if(!sa_table){ + err = -ENOMEM; + goto exit; + } + sa_table->entry_free_fn = sa_table_free_fn; + + exit: + return err; +} + +void sa_table_exit(void){ + HashTable_free(sa_table); +} + +/** Remove an SA from the table. + * + * @param state SA + */ +int sa_table_delete(SAState *state){ + int count = 0; + Hashcode h1, h2; + TableArg arg = { .ptr = state }; + // Remove by id. + h1 = sa_table_hash_id(state->ident.id); + count += HashTable_remove_entry(sa_table, h1, sa_table_state_fn, arg); + // Remove by spi/protocol/addr if spi nonzero. + if(!state->ident.spi) goto exit; + h2 = sa_table_hash_spi(state->ident.spi, state->ident.protocol, state->ident.addr); + if(h1 == h2) goto exit; + count += HashTable_remove_entry(sa_table, h2, sa_table_state_fn, arg); + exit: + return count; +} + +/** Add an SA to the table. + * The SA is indexed by id and spi/protocol/addr (if the spi is non-zero). + * + * @param state SA + * @return 0 on success, error code otherwise + */ +int sa_table_add(SAState *state){ + int err = 0; + Hashcode h1, h2; + int entries = 0; + + dprintf(">\n"); + // Index by id. + h1 = sa_table_hash_id(state->ident.id); + if(!HashTable_add_entry(sa_table, h1, HKEY(state->ident.id), state)){ + err = -ENOMEM; + goto exit; + } + entries++; + SAState_incref(state); + // Index by spi/protocol/addr if spi non-zero. + if(state->ident.spi){ + h2 = sa_table_hash_spi(state->ident.spi, state->ident.protocol, state->ident.addr); + if(h1 != h2){ + if(!HashTable_add_entry(sa_table, h2, HKEY(state->ident.id), state)){ + err = -ENOMEM; + goto exit; + } + entries++; + SAState_incref(state); + } + } + exit: + if(err && entries){ + sa_table_delete(state); + } + dprintf("< err=%d\n", err); + return err; +} + + +/** Find an SA by spi/protocol/addr. + * Increments the SA refcount on success. + * + * @param spi spi + * @param protocol protocol + * @param addr IP address + * @return SA or NULL + */ +SAState * sa_table_lookup_spi(u32 spi, u32 protocol, u32 addr){ + SAState *state = NULL; + Hashcode h; + SAIdent id = { + .spi = spi, + .protocol = protocol, + .addr = addr }; + TableArg arg = { .ptr = &id }; + HTEntry *entry = NULL; + + h = sa_table_hash_spi(spi, protocol, addr); + entry = HashTable_find_entry(sa_table, h, sa_table_spi_fn, arg); + if(entry){ + state = entry->value; + SAState_incref(state); + } + return state; +} + +/** Find an SA by unique id. + * Increments the SA refcount on success. + * + * @param id id + * @return SA or NULL + */ +SAState * sa_table_lookup_id(u32 id){ + Hashcode h; + TableArg arg = { .ul = id }; + HTEntry *entry = NULL; + SAState *state = NULL; + + dprintf("> id=%u\n", id); + h = sa_table_hash_id(id); + entry = HashTable_find_entry(sa_table, h, sa_table_id_fn, arg); + if(entry){ + state = entry->value; + SAState_incref(state); + } + dprintf("< state=%p\n", state); + return state; +} + +/** Replace an existing SA by another in the table. + * The existing SA is not removed if the new one cannot be added. + * + * @param existing SA to replace + * @param state new SA + * @return 0 on success, error code otherwise + */ +static int sa_table_replace(SAState *existing, SAState *state){ + int err = 0; + // Need check for in-use? + + dprintf(">\n"); + if(existing->keying.state != SA_STATE_ACQUIRE){ + err = -EINVAL; + goto exit; + } + // replace it. + err = sa_table_add(state); + if(err) goto exit; + sa_table_delete(existing); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Allocate an SA. + * + * @return SA or NULL + */ +SAState *SAState_alloc(void){ + SAState *state; + + dprintf(">\n"); + state = kmalloc(sizeof(SAState), GFP_ATOMIC); + if(!state) goto exit; + *state = (SAState){}; + atomic_set(&state->refcount, 1); + state->lock = SPIN_LOCK_UNLOCKED; + exit: + dprintf("< state=%p\n", state); + return state; +} + +/** Create an SA in initial state. + * It has no spi and its keying state is acquire. + * It must have a unique id, protocol and address. + * At some point it should get updated with a complete SA. + * + * @param ident SA identifier + * @param statep return parameter for new SA + * @return 0 on success, error code otherwise + */ +int SAState_init(SAIdent *ident, SAState **statep){ + int err = 0; + SAState *state = NULL; + + if(ident->spi || !ident->id){ + err = -EINVAL; + goto exit; + } + state = SAState_alloc(); + if (!state){ + err = -ENOMEM; + goto exit; + } + state->ident = *ident; + state->keying.state = SA_STATE_ACQUIRE; + exit: + return err; +} + +/** Create a complete SA, with spi and cipher suite. + * + * @param info SA parameters + * @param statep return parameter for new SA + * @return 0 on success, error code otherwise + */ +int SAState_create(SAInfo *info, SAState **statep){ + int err = 0; + SAState *state = NULL; + + dprintf(">\n"); + state = SAState_alloc(); + if (!state){ + err = -ENOMEM; + goto exit; + } + state->ident = info->ident; + state->limits = info->limits; + state->digest = info->digest; + state->cipher = info->cipher; + state->compress = info->compress; + state->security = info->security; + err = SAType_get(state->ident.protocol, &state->type); + if (err) goto exit; + err = state->type->init(state, NULL); + if (err) goto exit; + state->keying.state = SA_STATE_VALID; + exit: + if(err){ + SAState_decref(state); + state = NULL; + } + *statep = state; + dprintf("< err=%d\n", err); + return err; +} + +/** Create an SA for the given spi etc. + * For now we fix the cipher suite and the keys. + * Digest is SHA1 HMAC with a 128-bit key. + * Cipher is AES (Rijndael) in CBC mode with a 128-bit key. + * + * The cipher suite and keys should really come from policy, with the + * possibility of negotiating them with the peer (using IKE). + * Negotiation creates difficulties though - because the SA cannot + * be created immediately we have to be able to queue packets + * while the SA is being negotiated. + * + * @param spi spi + * @param protocol protocol + * @param addr address + * @param sa return parameter for SA + * @return 0 on success, error code otherwise + */ +int sa_create(int security, u32 spi, u32 protocol, u32 addr, SAState **sa){ + int err = 0; + SAInfo info = {}; + char *digest_name = "sha1"; + char *digest_key = "0123456789abcdef"; + int digest_key_n = strlen(digest_key); + char *cipher_name= "aes"; + char *cipher_key = "0123456789ABCDEF"; + int cipher_key_n = strlen(cipher_key); + + dprintf("> security=%d spi=%u protocol=%u addr=" IPFMT "\n", + security, spi, protocol, NIPQUAD(addr)); + if(!spi){ + spi = generate_spi(0, 0, protocol, addr); + } + dprintf("> info...\n"); + info.ident.id = sa_id++; + info.ident.spi = spi; + info.ident.protocol = protocol; + info.ident.addr = addr; + info.security = security; + + //sa_algorithm_probe_all(); + + dprintf("> digest name=%s key_n=%d\n", digest_name, digest_key_n); + strcpy(info.digest.name, digest_name); + info.digest.bits = digest_key_n * 8; + memcpy(info.digest.key, digest_key, digest_key_n); + + if(security & SA_CONF){ + dprintf("> cipher name=%s key_n=%d\n", cipher_name, cipher_key_n); + strcpy(info.cipher.name, cipher_name); + info.cipher.bits = cipher_key_n * 8; + memcpy(info.cipher.key, cipher_key, cipher_key_n); + } else { + dprintf("> cipher name=%s key_n=%d\n", "cipher_null", 0); + strcpy(info.cipher.name, "cipher_null"); + info.cipher.bits = 0; + memset(info.cipher.key, 0, sizeof(info.cipher.key)); + } + + err = sa_set(&info, 0, sa); + dprintf("< err=%d\n", err); + return err; +} + +/** Create or update an SA. + * The SA is added to the table. + * + * @param info SA parameters + * @param update create if zero, update otherwise + * @return 0 on success, error code otherwise + */ +int sa_set(SAInfo *info, int update, SAState **val){ + int err = 0; + SAState *state = NULL; + SAState *existing = NULL; + + dprintf("> info=%p update=%d val=%p\n", info, update, val); + existing = sa_table_lookup_id(info->ident.id); + if(update && !existing){ + err = -ENOENT; + } else if(!update && existing){ + err = -EINVAL; + } + if(err) goto exit; + err = SAState_create(info, &state); + if (err) goto exit; + if(existing){ + err = sa_table_replace(existing, state); + } else { + err = sa_table_add(state); + } + exit: + if(existing) SAState_decref(existing); + if(val && !err){ + *val = state; + } else { + SAState_decref(state); + } + dprintf("< err=%d\n", err); + return err; +} + +/** Delete an SA. Removes it from the SA table. + * It is an error if no SA with the given id exists. + * + * @param id SA id + * @return 0 on success, error code otherwise + */ +int sa_delete(int id){ + int err = 0; + SAState *state; + state = sa_table_lookup_id(id); + if (!state){ + err = -ENOENT; + goto exit; + } + sa_table_delete(state); + SAState_decref(state); + exit: + return err; +} diff --git a/tools/vnet/vnet-module/sa.h b/tools/vnet/vnet-module/sa.h new file mode 100644 index 0000000000..5da76c0bfc --- /dev/null +++ b/tools/vnet/vnet-module/sa.h @@ -0,0 +1,199 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef __VNET_SA_H__ +#define __VNET_SA_H__ + +#include +#include + +#include + +#ifndef CRYPTO_MAX_KEY_BYTES +#define CRYPTO_MAX_KEY_BYTES 64 +#define CRYPTO_MAX_KEY_BITS (CRYPTO_MAX_KEY_BYTES * 8) +#endif + +typedef struct SALimits { + u64 bytes_soft; + u64 bytes_hard; + u64 packets_soft; + u64 packets_hard; +} SALimits; + +typedef struct SACounts { + u64 bytes; + u64 packets; + u32 integrity_failures; +} SACounts; + +typedef struct SAReplay { + int replay; + u32 send_seq; + u32 recv_seq; + u32 bitmap; + u32 replay_window; +} SAReplay; + +typedef struct SAKey { + char name[CRYPTO_MAX_ALG_NAME]; + int bits; + char key[CRYPTO_MAX_KEY_BYTES]; +} SAKey; + +typedef struct SAKeying { + u8 state; + u8 dying; +} SAKeying; + +typedef struct SAIdent { + u32 id; + u32 spi; + u32 addr; + u32 protocol; +} SAIdent; + +struct SAType; + +/** Security assocation (SA). */ +typedef struct SAState { + atomic_t refcount; + spinlock_t lock; + /** Identifier. */ + struct SAIdent ident; + /** Security flags. */ + int security; + /** Keying state. */ + struct SAKeying keying; + /** Byte counts etc. */ + struct SACounts counts; + /** Byte limits etc. */ + struct SALimits limits; + /** Replay protection. */ + struct SAReplay replay; + /** Digest algorithm. */ + struct SAKey digest; + /** Cipher algorithm. */ + struct SAKey cipher; + /** Compress algorith. */ + struct SAKey compress; + /** SA type (ESP, AH). */ + struct SAType *type; + /** Data for the SA type to use. */ + void *data; +} SAState; + +typedef struct SAType { + char *name; + int protocol; + int (*init)(SAState *state, void *args); + void (*fini)(SAState *state); + int (*recv)(SAState *state, struct sk_buff *skb); + int (*send)(SAState *state, struct sk_buff *skb, Tunnel *tunnel); + u32 (*size)(SAState *state, int size); +} SAType; + +/** Information needed to create an SA. + * Unused algorithms have zero key size. + */ +typedef struct SAInfo { + /** Identifier. */ + SAIdent ident; + /** Security flags. */ + int security; + /** Digest algorithm and key. */ + SAKey digest; + /** Cipher algorithm and key. */ + SAKey cipher; + /** Compress algorithm and key. */ + SAKey compress; + /** SA lifetime limits. */ + SALimits limits; + /** Replay protection window. */ + int replay_window; +} SAInfo; + +enum sa_alg_type { + SA_ALG_DIGEST = 1, + SA_ALG_CIPHER = 2, + SA_ALG_COMPRESS = 3, +}; + +extern int SAType_add(SAType *type); +extern int SAType_del(SAType *type); +extern int SAType_get(int protocol, SAType **type); + +extern int sa_table_init(void); +extern void sa_table_exit(void); +extern int sa_table_delete(SAState *state); +extern int sa_table_add(SAState *state); +extern SAState * sa_table_lookup_spi(u32 spi, u32 protocol, u32 addr); +extern SAState * sa_table_lookup_id(u32 id); + +/** Increment reference count. + * + * @param sa security association (may be null) + */ +static inline void SAState_incref(SAState *sa){ + if(!sa) return; + atomic_inc(&sa->refcount); +} + +/** Decrement reference count, freeing if zero. + * + * @param sa security association (may be null) + */ +static inline void SAState_decref(SAState *sa){ + if(!sa) return; + if(atomic_dec_and_test(&sa->refcount)){ + sa->type->fini(sa); + kfree(sa); + } +} + +extern SAState *SAState_alloc(void); +extern int SAState_init(SAIdent *id, SAState **statep); +extern int SAState_create(SAInfo *info, SAState **statep); + +static inline int SAState_send(SAState *sa, struct sk_buff *skb, Tunnel *tunnel){ + return sa->type->send(sa, skb, tunnel); +} + +static inline int SAState_recv(SAState *sa, struct sk_buff *skb){ + return sa->type->recv(sa, skb); +} + +static inline int SAState_size(SAState *sa, int n){ + return sa->type->size(sa, n); +} + +extern int sa_create(int security, u32 spi, u32 protocol, u32 addr, SAState **sa); +extern int sa_set(SAInfo *info, int update, SAState **val); +extern int sa_delete(int id); + +enum { + SA_AUTH = 1, + SA_CONF = 2 +}; + +enum { + SA_STATE_ACQUIRE = 1, + SA_STATE_VALID = 2, +}; + +#endif /* !__VNET_SA_H__ */ diff --git a/tools/vnet/vnet-module/sa_algorithm.c b/tools/vnet/vnet-module/sa_algorithm.c new file mode 100644 index 0000000000..d5d1418174 --- /dev/null +++ b/tools/vnet/vnet-module/sa_algorithm.c @@ -0,0 +1,367 @@ +/* + * Copyright (c) 2002 James Morris + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include +//#include + +#include + +#define MODULE_NAME "IPSEC" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +/** @file Tables of supported IPSEC algorithms. + * Has tables for digests, ciphers and compression algorithms. + */ + +/* + * Algorithms supported by IPsec. These entries contain properties which + * are used in key negotiation and sa processing, and are used to verify + * that instantiated crypto transforms have correct parameters for IPsec + * purposes. + */ + +/** Digests. */ +static SAAlgorithm digest_alg[] = { + { + .name = "digest_null", + .info = { + .digest = { + .icv_truncbits = 0, + .icv_fullbits = 0, + } + }, + .alg = { + .sadb_alg_id = SADB_X_AALG_NULL, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 0, + .sadb_alg_maxbits = 0 + } + }, + { + .name = "md5", + .info = { .digest = { + .icv_truncbits = 96, + .icv_fullbits = 128, + } }, + .alg = { + .sadb_alg_id = SADB_AALG_MD5HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 128 + } + }, + { + .name = "sha1", + .info = { + .digest = { + .icv_truncbits = 96, + .icv_fullbits = 160, + } + }, + .alg = { + .sadb_alg_id = SADB_AALG_SHA1HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 160, + .sadb_alg_maxbits = 160 + } + }, + { + .name = "sha256", + .info = { + .digest = { + .icv_truncbits = 128, + .icv_fullbits = 256, + } + }, + .alg = { + .sadb_alg_id = SADB_X_AALG_SHA2_256HMAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 256, + .sadb_alg_maxbits = 256 + } + }, +/* { */ +/* .name = "ripemd160", */ +/* .info = { */ +/* .digest = { */ +/* .icv_truncbits = 96, */ +/* .icv_fullbits = 160, */ +/* } */ +/* }, */ +/* .alg = { */ +/* .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, */ +/* .sadb_alg_ivlen = 0, */ +/* .sadb_alg_minbits = 160, */ +/* .sadb_alg_maxbits = 160 */ +/* } */ +/* }, */ + { /* Terminator */ } +}; + +/** Ciphers. */ +static SAAlgorithm cipher_alg[] = { + { + .name = "cipher_null", + .info = { + .cipher = { + .blockbits = 8, + .defkeybits = 0, + } + }, + .alg = { + .sadb_alg_id = SADB_EALG_NULL, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 0, + .sadb_alg_maxbits = 0 + } + }, + { + .name = "des", + .info = { + .cipher = { + .blockbits = 64, + .defkeybits = 64, + } + }, + .alg = { + .sadb_alg_id = SADB_EALG_DESCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 64, + .sadb_alg_maxbits = 64 + } + }, + { + .name = "des3_ede", + .info = { + .cipher = { + .blockbits = 64, + .defkeybits = 192, + } + }, + .alg = { + .sadb_alg_id = SADB_EALG_3DESCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 192, + .sadb_alg_maxbits = 192 + } + }, +/* { */ +/* .name = "cast128", */ //cast5? +/* .info = { */ +/* .cipher = { */ +/* .blockbits = 64, */ +/* .defkeybits = 128, */ +/* } */ +/* }, */ +/* .alg = { */ +/* .sadb_alg_id = SADB_X_EALG_CASTCBC, */ +/* .sadb_alg_ivlen = 8, */ +/* .sadb_alg_minbits = 40, */ +/* .sadb_alg_maxbits = 128 */ +/* } */ +/* }, */ + { + .name = "blowfish", + .info = { + .cipher = { + .blockbits = 64, + .defkeybits = 128, + } + }, + .alg = { + .sadb_alg_id = SADB_X_EALG_BLOWFISHCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 40, + .sadb_alg_maxbits = 448 + } + }, + { + .name = "aes", + .info = { + .cipher = { + .blockbits = 128, + .defkeybits = 128, + } + }, + .alg = { + .sadb_alg_id = SADB_X_EALG_AESCBC, + .sadb_alg_ivlen = 8, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 256 + } + }, + { /* Terminator */ } +}; + +/** Compressors. */ +static SAAlgorithm compress_alg[] = { + { + .name = "deflate", + .info = { + .compress = { + .threshold = 90, + } + }, + .alg = { .sadb_alg_id = SADB_X_CALG_DEFLATE } + }, +/* { */ +/* .name = "lzs", */ +/* .info = { */ +/* .compress = { */ +/* .threshold = 90, */ +/* } */ +/* }, */ +/* .alg = { .sadb_alg_id = SADB_X_CALG_LZS } */ +/* }, */ +/* { */ +/* .name = "lzjh", */ +/* .info = { */ +/* .compress = { */ +/* .threshold = 50, */ +/* } */ +/* }, */ +/* .alg = { .sadb_alg_id = SADB_X_CALG_LZJH } */ +/* }, */ + { /* Terminator */ } +}; + +static SAAlgorithm *sa_algorithm_by_id(SAAlgorithm *algo, int alg_id) { + for( ; algo && algo->name; algo++){ + if (algo->alg.sadb_alg_id == alg_id) { + return (algo->available ? algo : NULL); + } + } + return NULL; +} + + +static SAAlgorithm *sa_algorithm_by_name(SAAlgorithm *algo, char *name) { + if (!name) return NULL; + for( ; algo && algo->name; algo++){ + if (strcmp(name, algo->name) == 0) { + return (algo->available ? algo : NULL); + } + } + return NULL; +} + +SAAlgorithm *sa_digest_by_id(int alg_id) { + return sa_algorithm_by_id(digest_alg, alg_id); +} + +SAAlgorithm *sa_cipher_by_id(int alg_id) { + return sa_algorithm_by_id(cipher_alg, alg_id); +} + +SAAlgorithm *sa_compress_by_id(int alg_id) { + return sa_algorithm_by_id(compress_alg, alg_id); +} + +SAAlgorithm *sa_digest_by_name(char *name) { + return sa_algorithm_by_name(digest_alg, name); +} + +SAAlgorithm *sa_cipher_by_name(char *name) { + return sa_algorithm_by_name(cipher_alg, name); +} + +SAAlgorithm *sa_compress_by_name(char *name) { + return sa_algorithm_by_name(compress_alg, name); +} + +SAAlgorithm *sa_digest_by_index(unsigned int idx) { + return digest_alg + idx; +} + +SAAlgorithm *sa_cipher_by_index(unsigned int idx) { + return cipher_alg + idx; +} + +SAAlgorithm *sa_compress_by_index(unsigned int idx) { + return compress_alg + idx; +} + +static void sa_algorithm_probe(SAAlgorithm *algo){ + int status; + dprintf("> algo=%p\n", algo); + for( ; algo && algo->name; algo++){ + dprintf("> algorithm %s...\n", algo->name); + status = crypto_alg_available(algo->name, 0); + dprintf("> algorithm %s status=%d\n",algo->name, status); + if (algo->available != status){ + algo->available = status; + } + } + dprintf("<\n"); +} + +/** Crypto api is broken. When an unregistered algorithm is requested it + * tries to load a module of the same name. But not all algorithms are + * defined by modules of the same name. + */ +static char *crypto_modules[] = { + "aes", + //"arc4", + "blowfish", + //"cast5", + //"cast6", + "crypto_null", + "des", + //"md4", + "md5", + //"serpent", + "sha1", + "sha256", + //"sha512", + //"twofish", + NULL +}; + +#include + +static void sa_module_probe(char **modules){ + char **p; + dprintf(">\n"); + for(p = modules; *p; p++){ + dprintf("> %s\n", *p); + request_module(*p); + } + dprintf("<\n"); +} + +/** + * Probe for the availability of crypto algorithms, and set the available + * flag for any algorithms found on the system. This is typically called by + * pfkey during userspace SA add, update or register. + */ +void sa_algorithm_probe_all(void){ + dprintf("> \n"); + //BUG_ON(in_softirq()); + sa_module_probe(crypto_modules); + sa_algorithm_probe(digest_alg); + sa_algorithm_probe(cipher_alg); + sa_algorithm_probe(compress_alg); + dprintf("<\n"); +} diff --git a/tools/vnet/vnet-module/sa_algorithm.h b/tools/vnet/vnet-module/sa_algorithm.h new file mode 100644 index 0000000000..333481bcb7 --- /dev/null +++ b/tools/vnet/vnet-module/sa_algorithm.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef __VNET_SA_ALGORITHM_H__ +#define __VNET_SA_ALGORITHM_H__ + +#include +#include + +typedef struct SADigestInfo { + u16 icv_truncbits; + u16 icv_fullbits; +} SADigestInfo; + +typedef struct SACipherInfo { + u16 blockbits; + u16 defkeybits; +} SACipherInfo; + +typedef struct SACompressInfo { + u16 threshold; +} SACompressInfo; + +typedef struct SAAlgorithm { + char *name; + u8 available; + union { + SADigestInfo digest; + SACipherInfo cipher; + SACompressInfo compress; + } info; + struct sadb_alg alg; +} SAAlgorithm; + +extern SAAlgorithm *sa_digest_by_id(int alg_id); +extern SAAlgorithm *sa_cipher_by_id(int alg_id); +extern SAAlgorithm *sa_compress_by_id(int alg_id); +extern SAAlgorithm *sa_digest_by_name(char *name); +extern SAAlgorithm *sa_cipher_by_name(char *name); +extern SAAlgorithm *sa_compress_by_name(char *name); +extern SAAlgorithm *sa_digest_by_index(unsigned int idx); +extern SAAlgorithm *sa_cipher_by_index(unsigned int idx); +extern SAAlgorithm *sa_compress_by_index(unsigned int idx); +extern void sa_algorithm_probe_all(void); + +#define MAX_KEY_BITS 512 + +#endif /* ! __VNET_SA_ALGORITHM_H__ */ diff --git a/tools/vnet/vnet-module/skb_context.c b/tools/vnet/vnet-module/skb_context.c new file mode 100644 index 0000000000..5a76d7ed89 --- /dev/null +++ b/tools/vnet/vnet-module/skb_context.c @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include + +#include + +#define MODULE_NAME "VNET" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +SkbContext *SkbContext_create(u32 vnet, u32 addr, int protocol, void *data, + void (*free_fn)(SkbContext *)){ + SkbContext *context = NULL; + + context = kmalloc(sizeof(SkbContext), GFP_ATOMIC); + if(!context) goto exit; + context->vnet = vnet; + context->addr = addr; + context->protocol = protocol; + context->data = data; + context->free_fn = free_fn; + context->next = NULL; + atomic_set(&context ->refcount, 1); + exit: + return context; +} + +void SkbContext_free(SkbContext *context){ + if(!context) return; + if(context->next) SkbContext_decref(context->next); + if(context->free_fn) context->free_fn(context); + context->vnet = 0; + context->addr = 0; + context->protocol = 0; + context->free_fn = NULL; + context->data = NULL; + context->next = NULL; + kfree(context); +} + +int SkbContext_push(SkbContext **val, u32 vnet, u32 addr, int protocol, + void *data, void (*free_fn)(SkbContext *)){ + int err = 0; + SkbContext *context = NULL; + + dprintf("> vnet=%u addr=%u.%u.%u.%u protocol=%d\n", + vnet, NIPQUAD(addr), protocol); + context = SkbContext_create(vnet, addr, protocol, data, free_fn); + if(!context){ + err = -ENOMEM; + goto exit; + } + context->next = *val; + *val = context; + exit: + dprintf("< err=%d\n", err); + return err; +} + +int skb_push_context(struct sk_buff *skb, u32 vnet, u32 addr, int protocol, + void *data, void (*free_fn)(SkbContext *)){ + int err = 0; + //SkbContext *ctxt = SKB_CONTEXT(skb); + dprintf("> skb=%p\n", skb); + + //err = SkbContext_push(&ctxt, vnet, addr, protocol, data, free_fn); //todo fixme + //SKB_CONTEXT(skb) = ctxt;//todo fixme + dprintf("< err=%d\n", err); + return err; +} + + diff --git a/tools/vnet/vnet-module/skb_context.h b/tools/vnet/vnet-module/skb_context.h new file mode 100644 index 0000000000..10cfac4c3b --- /dev/null +++ b/tools/vnet/vnet-module/skb_context.h @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VNET_SKB_CONTEXT_H__ +#define __VNET_SKB_CONTEXT_H__ + +#include +#include +#include +#include + +/** Structure used to record inbound processing path for skbs. + * For example, the ETHERIP protocol handler can use this to + * tell whether an inbound packet came through IPSEC ESP or not. + */ +typedef struct SkbContext { + u32 vnet; + u32 addr; + int protocol; + void *data; + void (*free_fn)(struct SkbContext *); + atomic_t refcount; + struct SkbContext *next; +} SkbContext; + +/** Decrement the reference count, freeing if zero. + * + * @param context context (may be null) + */ +static inline void SkbContext_decref(SkbContext *context){ + extern void SkbContext_free(SkbContext *context); + if(!context) return; + if(atomic_dec_and_test(&context->refcount)){ + SkbContext_free(context); + } +} + +/** Increment the reference count. + * + * @param context context (may be null) + */ +static inline void SkbContext_incref(SkbContext *context){ + if(!context) return; + atomic_inc(&context->refcount); +} + +extern SkbContext *SkbContext_create(u32 vnet, u32 addr, int protocol, void *data, + void (*free_fn)(SkbContext *)); + +extern int SkbContext_push(SkbContext **val, u32 vnet, u32 addr, int protocol, + void *data, void (*free_fn)(SkbContext *)); + +struct sk_buff; +extern int skb_push_context(struct sk_buff *skb, u32 vnet, u32 addr, int protocol, + void *data, void (*free_fn)(SkbContext *)); + +//todo: fixme +#define SKB_CONTEXT(_skb) ((SkbContext *)(&(_skb)->cb[0])) + +#endif /* !__VNET_SKB_CONTEXT_H__ */ diff --git a/tools/vnet/vnet-module/skb_util.c b/tools/vnet/vnet-module/skb_util.c new file mode 100644 index 0000000000..c9742d6f51 --- /dev/null +++ b/tools/vnet/vnet-module/skb_util.c @@ -0,0 +1,515 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#define MODULE_NAME "VNET" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +static const int DEBUG_SCATTERLIST = 0; +static const int DEBUG_SKB = 0; + +//============================================================================ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +#define SET_SCATTER_ADDR(sg, addr) do{} while(0) +#else +#define SET_SCATTER_ADDR(sg, addr) (sg).address = (addr) +#endif + +/** Make enough room in an skb for extra header and trailer. + * + * @param pskb return parameter for expanded skb + * @param skb skb + * @param head_n required headroom + * @param tail_n required tailroom + * @return 0 on success, error code otherwise + */ +int skb_make_room(struct sk_buff **pskb, struct sk_buff *skb, int head_n, int tail_n){ + int err = 0; + int has_headroom = (head_n <= skb_headroom(skb)); + int has_tailroom = (tail_n <= skb_tailroom(skb)); + int writeable = !skb_cloned(skb) && !skb_shared(skb); + + dprintf("> skb=%p headroom=%d head_n=%d tailroom=%d tail_n=%d\n", + skb, + skb_headroom(skb), head_n, + skb_tailroom(skb), tail_n); + if(writeable && has_headroom && has_tailroom){ + // There's room! Reuse it. + *pskb = skb; + } else if(writeable && has_tailroom){ + // Tailroom, no headroom. Expand header the way GRE does. + struct sk_buff *new_skb = skb_realloc_headroom(skb, head_n + 16); + if(!new_skb){ + err = -ENOMEM; + goto exit; + } + dev_kfree_skb(skb); + *pskb = new_skb; + } else { + // No room. Expand. There may be more efficient ways to do + // this, but this is simple and correct. + struct sk_buff *new_skb = skb_copy_expand(skb, head_n + 16, tail_n, GFP_ATOMIC); + if(!new_skb){ + err = -ENOMEM; + goto exit; + } + dev_kfree_skb(skb); + *pskb = new_skb; + } + dprintf("> skb=%p headroom=%d head_n=%d tailroom=%d tail_n=%d\n", + *pskb, + skb_headroom(*pskb), head_n, + skb_tailroom(*pskb), tail_n); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Copy some data bits from a kernel buffer to an skb. + * Derived in the obvious way from skb_copy_bits(). + */ +int skb_put_bits(const struct sk_buff *skb, int offset, void *src, int len) +{ + int i, copy; + int start = skb->len - skb->data_len; + + if (offset > (int)skb->len-len) + goto fault; + + /* Copy header. */ + if ((copy = start-offset) > 0) { + if (copy > len) + copy = len; + memcpy(skb->data + offset, src, copy); + if ((len -= copy) == 0) + return 0; + offset += copy; + src += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end-offset) > 0) { + u8 *vaddr; + + if (copy > len) + copy = len; + + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + memcpy(vaddr + skb_shinfo(skb)->frags[i].page_offset + offset - start, + src, + copy); + kunmap_skb_frag(vaddr); + + if ((len -= copy) == 0) + return 0; + offset += copy; + src += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + list->len; + if ((copy = end-offset) > 0) { + if (copy > len) + copy = len; + if (skb_put_bits(list, offset-start, src, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + src += copy; + } + start = end; + } + } + if (len == 0) + return 0; + + fault: + return -EFAULT; +} + +/** Add some space to the end of a (possibly fragmented) skb. + * + * Only works with Xen output skbs. Output skbs have 1 frag, and we + * add another frag for the extra space. + * + * @param skb skb + * @param n number of bytes to add + * @return 0 on success, error code otherwise + * + * @todo fixme + */ +int pskb_put(struct sk_buff *skb, int n){ + int err = 0; + if(1 || skb_is_nonlinear(skb)){ + struct skb_shared_info *info = skb_shinfo(skb); + char *ptr = NULL; + + if(info->nr_frags >= MAX_SKB_FRAGS){ + err = -ENOMEM; + goto exit; + } + ptr = kmalloc(n, GFP_ATOMIC); + if(!ptr){ + err = -ENOMEM; + goto exit; + } + info->nr_frags++; + info->frags[info->nr_frags - 1].page = virt_to_page(ptr); + info->frags[info->nr_frags - 1].page_offset = ((unsigned long)ptr & ~PAGE_MASK); + info->frags[info->nr_frags - 1].size = n; + + skb->data_len += n; + skb->len += n; + } else { + __skb_put(skb, n); + } + exit: + if(err) dprintf("< err=%d\n", err); + return err; +} + +/** Print some bits of an skb. + * + * @param skb to print + * @param offset byte offset to start printing at + * @param n number of bytes to print + */ +void skb_print_bits(struct sk_buff *skb, int offset, int n){ + int chunk = 16; + int i, k; + u8 buff[chunk]; + if(!DEBUG_SKB) return; + while(n){ + k = (n > chunk ? chunk : n); + skb_copy_bits(skb, offset, buff, k); + printk("%03d ", offset); + for(i=0; itail -= n; + skb->len -= n; + return skb->tail; +} + +// #define BUG_TRAP(x) +// if(!(x)){ printk("KERNEL: assertion (" #x ") failed at " __FILE__ "(%d)\n", __LINE__); } + +/** Convert a (possibly fragmented) skb into a scatter list. + * + * @param skb skb to convert + * @param sg scatterlist to set up + * @param sg_n size of sg on input, number of elements set on output + * @param offset offset into data to start at + * @param len number of bytes + * @return 0 on success, error code otherwise + */ +int skb_scatterlist(struct sk_buff *skb, struct scatterlist *sg, int *sg_n, + int offset, int len){ + int err = 0; + int start; // No. of bytes copied so far (where next copy starts). + int size; // Size of the next chunk. + int end; // Where the next chunk ends (start + size). + int copy; // Number of bytes to copy in one operation. + int sg_i = 0; // Index into sg. + int i; + + if(DEBUG_SCATTERLIST){ + dprintf("> offset=%d len=%d (end=%d), skb len=%d,\n", + offset, len, offset+len, skb->len); + } + start = 0; + size = skb_headlen(skb); + end = start + size; + copy = end - offset; + if(copy > 0){ + char *p; + if(copy > len) copy = len; + if(sg_i >= *sg_n){ + err = -EINVAL; + goto exit; + } + p = skb->data + offset; + SET_SCATTER_ADDR(sg[sg_i], NULL); + sg[sg_i].page = virt_to_page(p); + sg[sg_i].offset = ((unsigned long)p & ~PAGE_MASK); + sg[sg_i].length = copy; + if(DEBUG_SCATTERLIST){ + dprintf("> sg_i=%d .page=%p .offset=%u .length=%d\n", + sg_i, sg[sg_i].page, sg[sg_i].offset, sg[sg_i].length); + } + sg_i++; + if((len -= copy) == 0) goto exit; + offset += copy; + } + start = end; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++){ + BUG_TRAP(start <= offset + len); + size = skb_shinfo(skb)->frags[i].size; + end = start + size; + copy = end - offset; + if(copy > 0){ + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + if(copy > len) copy = len; + if(sg_i >= *sg_n){ + err = -EINVAL; + goto exit; + } + SET_SCATTER_ADDR(sg[sg_i], NULL); + sg[sg_i].page = frag->page; + sg[sg_i].offset = frag->page_offset + offset - start; + sg[sg_i].length = copy; + if(DEBUG_SCATTERLIST){ + dprintf("> sg_i=%d .page=%p .offset=%u .length=%d\n", + sg_i, sg[sg_i].page, sg[sg_i].offset, sg[sg_i].length); + } + sg_i++; + if((len -= copy) == 0) goto exit; + offset += copy; + } + start = end; + } + exit: + if(!err) *sg_n = sg_i; + if(len) wprintf("> len=%d\n", len); + if(len) BUG(); + if(err) dprintf("< err=%d sg_n=%d\n", err, *sg_n); + return err; +} + +struct arpheader +{ + unsigned short ar_hrd; /* format of hardware address */ + unsigned short ar_pro; /* format of protocol address */ + unsigned char ar_hln; /* length of hardware address */ + unsigned char ar_pln; /* length of protocol address */ + unsigned short ar_op; /* ARP opcode (command) */ + +#if 1 + /* + * Ethernet looks like this : This bit is variable sized however... + */ + unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ + unsigned char ar_sip[4]; /* sender IP address */ + unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ + unsigned char ar_tip[4]; /* target IP address */ +#endif + +}; + +void print_skb_data(char *msg, int count, struct sk_buff *skb, u8 *data, int len) +{ + static int skb_count = 1000000; + u8 *ptr, *end; + u32 src_addr, dst_addr; + // Transport layer header. + union { + struct tcphdr *th; + struct udphdr *uh; + struct icmphdr *icmph; + struct igmphdr *igmph; + struct iphdr *ipiph; + unsigned char *raw; + } h; + // Network layer header. + union { + struct iphdr *iph; + struct ipv6hdr *ipv6h; + struct arpheader *arph; + struct ipxhdr *ipxh; + unsigned char *raw; + } nh; + // Link layer header. + union { + struct ethhdr *ethernet; + unsigned char *raw; + } mac; + int protocol; + if(!count) count = ++skb_count; + if(!msg) msg = (char *)__FUNCTION__; + if(!data){ + printk("%s.%d> null data\n", msg, count); + return; + } + ptr = data; + end = data + len; + mac.raw = ptr; + ptr += sizeof(struct ethhdr); + if(ptr > end){ printk("***MAC:"); goto exit; } + protocol = ntohs(mac.ethernet->h_proto); + nh.raw = ptr; + + printk("%s.%d> type=%d protocol=0x%x\n", + msg, count, skb->pkt_type, htons(skb->protocol)); + if(1){ + printk("%s.%d> %p mac src=" MACFMT " dst=" MACFMT "\n", + msg, count, data, + MAC6TUPLE(mac.ethernet->h_source), + MAC6TUPLE(mac.ethernet->h_dest)); + } + + switch(protocol){ + case ETH_P_ARP: + ptr += sizeof(struct arpheader); + if(ptr > end){ printk("***ARP:"); goto exit; } + if(0){ + printk("%s.%d> ARP hrd=%d, pro=%d, hln=%d, pln=%d, op=%d\n", + msg, count, + nh.arph->ar_hrd, nh.arph->ar_pro, nh.arph->ar_hln, + nh.arph->ar_pln, nh.arph->ar_op); + } + memcpy(&src_addr, nh.arph->ar_sip, 4); + src_addr = ntohl(src_addr); + memcpy(&dst_addr, nh.arph->ar_tip, 4); + dst_addr = ntohl(dst_addr); + printk("%s.%d> ARP HW src=" MACFMT " dst=" MACFMT "\n", + msg, count, MAC6TUPLE(nh.arph->ar_sha), MAC6TUPLE(nh.arph->ar_tha)); + printk("%s.%d> ARP IP src=" IPFMT " dst=" IPFMT "\n", + msg, count, HIPQUAD(src_addr), HIPQUAD(dst_addr)); + break; + case ETH_P_IP: { + u16 src_port, dst_port; + if(ptr + sizeof(struct iphdr) > end){ printk("***IP:"); goto exit; } + src_addr = ntohl(nh.iph->saddr); + dst_addr = ntohl(nh.iph->daddr); + if(1){ + printk("%s.%d> IP proto=%d src=" IPFMT " dst=" IPFMT "\n", + msg, count, nh.iph->protocol, + HIPQUAD(src_addr), HIPQUAD(dst_addr)); + printk("%s.%d> IP tot_len=%u len=%d\n", + msg, count, nh.iph->tot_len & 0xffff, len - ETH_HLEN); + } + ptr += (nh.iph->ihl * 4); + if(ptr > end){ printk ("***IP: len"); goto exit; } + h.raw = ptr; + switch(nh.iph->protocol){ + case IPPROTO_TCP: + ptr += sizeof(struct tcphdr); + if(ptr > end){ printk("***TCP:"); goto exit; } + src_port = ntohs(h.th->source); + dst_port = ntohs(h.th->dest); + printk("%s.%d> TCP src=" IPFMT ":%u dst=" IPFMT ":%u\n", + msg, count, + HIPQUAD(src_addr), src_port, + HIPQUAD(dst_addr), dst_port); + break; + case IPPROTO_UDP: + ptr += sizeof(struct udphdr); + if(ptr > end){ printk("***UDP:"); goto exit; } + src_port = ntohs(h.uh->source); + dst_port = ntohs(h.uh->dest); + printk("%s.%d> UDP src=" IPFMT ":%u dst=" IPFMT ":%u\n", + msg, count, + HIPQUAD(src_addr), src_port, + HIPQUAD(dst_addr), dst_port); + break; + default: + printk("%s.%d> IP %d src=" IPFMT " dst=" IPFMT "\n", + msg, count, + nh.iph->protocol, HIPQUAD(src_addr), HIPQUAD(dst_addr)); + break; + } + break; } + case ETH_P_IPV6: + printk("%s.%d> IPv6\n", msg, count); + break; + case ETH_P_IPX: + printk("%s.%d> IPX\n", msg, count); + break; + default: + printk("%s.%d> protocol=%d\n", msg, count, protocol); + break; + } + return; + exit: + printk("%s.%d> %s: skb problem\n", msg, count, __FUNCTION__); + printk("%s.%d> %s: data=%p end=%p(%d) ptr=%p(%d) eth=%d arp=%d ip=%d\n", + msg, count, __FUNCTION__, + data, end, end - data, ptr, ptr - data, + sizeof(struct ethhdr), sizeof(struct arphdr), sizeof(struct iphdr)); + return; +} + diff --git a/tools/vnet/vnet-module/skb_util.h b/tools/vnet/vnet-module/skb_util.h new file mode 100644 index 0000000000..d3e9a1e6d9 --- /dev/null +++ b/tools/vnet/vnet-module/skb_util.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _VNET_SKB_UTIL_H_ +#define _VNET_SKB_UTIL_H_ + +struct sk_buff; +struct scatterlist; + +extern int skb_make_room(struct sk_buff **pskb, struct sk_buff *skb, int head_n, int tail_n); + +extern int skb_put_bits(const struct sk_buff *skb, int offset, void *src, int len); + +extern int pskb_put(struct sk_buff *skb, int n); + +extern void skb_print_bits(struct sk_buff *skb, int offset, int n); + +extern void buf_print(char *buf, int n); + +extern void *skb_trim_tail(struct sk_buff *skb, int n); + +extern int skb_scatterlist(struct sk_buff *skb, struct scatterlist *sg, + int *sg_n, int offset, int len); + +extern void print_skb_data(char *msg, int count, struct sk_buff *skb, u8 *data, int len); + + +#endif diff --git a/tools/vnet/vnet-module/tunnel.c b/tools/vnet/vnet-module/tunnel.c new file mode 100644 index 0000000000..2ea261bf6f --- /dev/null +++ b/tools/vnet/vnet-module/tunnel.c @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include "hash_table.h" + +#define MODULE_NAME "VNET" +//#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +void Tunnel_print(Tunnel *tunnel){ + if(tunnel){ + printk("Tunnel<%p base=%p ref=%02d type=%s>\n", + tunnel, + tunnel->base, + atomic_read(&tunnel->refcount), + tunnel->type->name); + if(tunnel->base){ + Tunnel_print(tunnel->base); + } + } else { + printk("Tunnel<%p base=%p ref=%02d type=%s>\n", + NULL, NULL, 0, "ip"); + } +} + +int Tunnel_create(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **val){ + int err = 0; + Tunnel *tunnel = NULL; + dprintf("> type=%s vnet=%d addr=" IPFMT " base=%s\n", + type->name, vnet, NIPQUAD(addr), (base ? base->type->name : "ip")); + if(!type || !type->open || !type->send || !type->close){ + err = -EINVAL; + goto exit; + } + tunnel = kmalloc(sizeof(Tunnel), GFP_ATOMIC); + if(!tunnel){ + err = -ENOMEM; + goto exit; + } + atomic_set(&tunnel->refcount, 1); + tunnel->key.vnet = vnet; + tunnel->key.addr = addr; + tunnel->type = type; + tunnel->data = NULL; + tunnel->send_stats = (TunnelStats){}; + Tunnel_incref(base); + tunnel->base = base; + err = type->open(tunnel); + exit: + if(err && tunnel){ + Tunnel_decref(tunnel); + tunnel = NULL; + } + *val = tunnel; + dprintf("< err=%d\n", err); + return err; +} + +int Tunnel_open(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **tunnel){ + int err = 0; + + dprintf(">\n"); + err = Tunnel_create(type, vnet, addr, base, tunnel); + if(err) goto exit; + err = Tunnel_add(*tunnel); + exit: + if(err){ + Tunnel_decref(*tunnel); + *tunnel = NULL; + } + dprintf("< err=%d\n", err); + return err; +} + +void TunnelStats_update(TunnelStats *stats, int len, int err){ + dprintf(">len=%d err=%d\n", len, err); + if(err){ + stats->dropped_bytes += len; + stats->dropped_packets++; + } else { + stats->bytes += len; + stats->packets++; + } + dprintf("<\n"); +} + +/** Table of tunnels, indexed by vnet and addr. */ +HashTable *tunnel_table = NULL; + +static inline Hashcode tunnel_table_key_hash_fn(void *k){ + TunnelKey *key = k; + Hashcode h = 0; + h = hash_2ul(key->vnet, key->addr); + return h; +} + +static int tunnel_table_key_equal_fn(void *k1, void *k2){ + TunnelKey *key1 = k1; + TunnelKey *key2 = k2; + return (key1->vnet == key2->vnet) + && (key1->addr == key2->addr); +} + +static void tunnel_table_entry_free_fn(HashTable *table, HTEntry *entry){ + Tunnel *tunnel; + if(!entry) return; + tunnel = entry->value; + //dprintf(">\n"); Tunnel_print(tunnel); + Tunnel_decref(tunnel); + HTEntry_free(entry); +} + +int Tunnel_init(void){ + int err = 0; + dprintf(">\n"); + tunnel_table = HashTable_new(0); + if(!tunnel_table){ + err = -ENOMEM; + goto exit; + } + tunnel_table->entry_free_fn = tunnel_table_entry_free_fn; + tunnel_table->key_hash_fn = tunnel_table_key_hash_fn; + tunnel_table->key_equal_fn = tunnel_table_key_equal_fn; + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Lookup tunnel state by vnet and destination. + * + * @param vnet vnet + * @param addr destination address + * @return tunnel state or NULL + */ +Tunnel * Tunnel_lookup(u32 vnet, u32 addr){ + Tunnel *tunnel = NULL; + TunnelKey key = {.vnet = vnet, .addr = addr }; + dprintf(">\n"); + tunnel = HashTable_get(tunnel_table, &key); + Tunnel_incref(tunnel); + dprintf("< tunnel=%p\n", tunnel); + return tunnel; +} + +int Tunnel_add(Tunnel *tunnel){ + int err = 0; + dprintf(">\n"); + if(HashTable_add(tunnel_table, tunnel, tunnel)){ + Tunnel_incref(tunnel); + } else { + err = -ENOMEM; + } + dprintf("< err=%d\n", err); + return err; +} + +int Tunnel_del(Tunnel *tunnel){ + return HashTable_remove(tunnel_table, tunnel); +} + +/** Do tunnel send processing on a packet. + * + * @param tunnel tunnel state + * @param skb packet + * @return 0 on success, error code otherwise + */ +int Tunnel_send(Tunnel *tunnel, struct sk_buff *skb){ + int err = 0; + int len; + dprintf("> tunnel=%p skb=%p\n", tunnel, skb); + len = skb->len; + if(tunnel){ + dprintf("> type=%s type->send...\n", tunnel->type->name); + err = tunnel->type->send(tunnel, skb); + // Must not refer to skb after sending - might have been freed. + TunnelStats_update(&tunnel->send_stats, len, err); + } else { + struct net_device *dev = NULL; + err = vnet_get_device(DEVICE, &dev); + if(err) goto exit; + skb->dev = dev; + err = skb_xmit(skb); + dev_put(dev); + } + exit: + dprintf("< err=%d\n", err); + return err; +} + +int __init tunnel_module_init(void){ + return Tunnel_init(); +} + +void __exit tunnel_module_exit(void){ +} diff --git a/tools/vnet/vnet-module/tunnel.h b/tools/vnet/vnet-module/tunnel.h new file mode 100644 index 0000000000..e2241e82d4 --- /dev/null +++ b/tools/vnet/vnet-module/tunnel.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef __VNET_TUNNEL_H__ +#define __VNET_TUNNEL_H__ + +#include +#include +#include + +struct sk_buff; +struct Tunnel; + +typedef struct TunnelType { + const char *name; + int (*open)(struct Tunnel *tunnel); + int (*send)(struct Tunnel *tunnel, struct sk_buff *skb); + void (*close)(struct Tunnel *tunnel); +} TunnelType; + +typedef struct TunnelStats { + int bytes; + int packets; + int dropped_bytes; + int dropped_packets; +} TunnelStats; + +typedef struct TunnelKey { + u32 vnet; + u32 addr; +} TunnelKey; + +typedef struct Tunnel { + /** Key identifying the tunnel. Must be first. */ + struct TunnelKey key; + /** Reference count. */ + atomic_t refcount; + /** Tunnel type. */ + struct TunnelType *type; + /** Statistics. */ + struct TunnelStats send_stats; + /** Type-dependent state. */ + void *data; + /** Underlying tunnel (may be null). */ + struct Tunnel *base; +} Tunnel; + +extern void Tunnel_print(Tunnel *tunnel); + +/** Decrement the reference count, freeing if zero. + * + * @param tunnel tunnel (may be null) + */ +static inline void Tunnel_decref(Tunnel *tunnel){ + if(!tunnel) return; + if(atomic_dec_and_test(&tunnel->refcount)){ + printk("%s> Closing tunnel:\n", __FUNCTION__); + Tunnel_print(tunnel); + tunnel->type->close(tunnel); + Tunnel_decref(tunnel->base); + kfree(tunnel); + } +} + +/** Increment the reference count. + * + * @param tunnel tunnel (may be null) + */ +static inline void Tunnel_incref(Tunnel *tunnel){ + if(!tunnel) return; + atomic_inc(&tunnel->refcount); +} + +extern int Tunnel_init(void); +extern Tunnel * Tunnel_lookup(u32 vnet, u32 addr); +extern int Tunnel_add(Tunnel *tunnel); +extern int Tunnel_del(Tunnel *tunnel); +extern int Tunnel_send(Tunnel *tunnel, struct sk_buff *skb); + +extern int Tunnel_create(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **tunnelp); +extern int Tunnel_open(TunnelType *type, u32 vnet, u32 addr, Tunnel *base, Tunnel **tunnelp); + +extern int tunnel_module_init(void); +extern void tunnel_module_exit(void); + +#endif /* !__VNET_TUNNEL_H__ */ diff --git a/tools/vnet/vnet-module/varp.c b/tools/vnet/vnet-module/varp.c new file mode 100644 index 0000000000..3b1507e045 --- /dev/null +++ b/tools/vnet/vnet-module/varp.c @@ -0,0 +1,1236 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "allocate.h" +#include "hash_table.h" +#include "sys_net.h" +#include "sys_string.h" + +#define MODULE_NAME "VARP" +//#define DEBUG 1 +#undef DEBUG +#include "debug.h" + + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +// The 'ethernet' field in the skb->mac union went away. +#define MAC_ETH(_skb) ((struct ethhdr *)(_skb)->mac.raw) +#else +#define MAC_ETH(_skb) ((_skb)->mac.ethernet) +#endif + +/** @file VARP: Virtual ARP. + * + * Handles virtual ARP requests for vnet/vmac. + */ + +/* + +Varp uses UDP on port 1798. + +on domain up: ? + send varp.announce { id, vmac, vnet, coa } for each vif + that haven't announced before, or has changed. + install vif entries in local table. + +on varp.announce{ id, vmac, vnet, coa }: + update VARP entry for vmac x vnet if have one, reset ttl. + +on varp.request { id, vmac, vnet }: + if have a vif for the requested vmac/vnet, + reply with varp.announce{ id, vmac, vnet, coa } + +on timer: + traverse VARP table, flush old entries. + +on probe timer: + probe again if not out of tries. + if out of tries invalidate entry. + +*/ + +/** Time-to-live of varp entries (in jiffies).*/ +#define VARP_ENTRY_TTL (60*HZ) + +/** Maximum number of varp probes to make. */ +#define VARP_PROBE_MAX 5 + +/** Interval between varp probes (in jiffies). */ +#define VARP_PROBE_INTERVAL (3*HZ) + +/** Maximum number of queued skbs for a varp entry. */ +#define VARP_QUEUE_MAX 16 + +/** Number of buckets in the varp table (must be prime). */ +#define VARP_TABLE_BUCKETS 3001 + +/** Varp entry states. */ +enum { + VARP_STATE_INCOMPLETE = 1, + VARP_STATE_REACHABLE = 2, + VARP_STATE_FAILED = 3 +}; + +/** Varp entry flags. */ +enum { + VARP_FLAG_PROBING = 1, + VARP_FLAG_PERMANENT = 2, +}; + +/** Key for varp entries. */ +typedef struct VarpKey { + /** Vnet id (host order). */ + u32 vnet; + /** Virtual MAC address. */ + Vmac vmac; +} VarpKey; + +/** An entry in the varp cache. */ +typedef struct VarpEntry { + /** Key for the entry. */ + VarpKey key; + /** Care-of address for the key. */ + u32 addr; + /** Last-updated timestamp. */ + unsigned long timestamp; + /** State. */ + short state; + /** Flags. */ + short flags; + /** Reference count. */ + atomic_t refcount; + /** Lock. */ + rwlock_t lock; + /** How many probes have been made. */ + atomic_t probes; + /** Probe timer. */ + struct timer_list timer; + void (*error)(struct VarpEntry *ventry, struct sk_buff *skb); + /** Outbound skb queue. */ + struct sk_buff_head queue; + /** Maximum size of the queue. */ + int queue_max; + + int locks; +} VarpEntry; + +/** The varp cache. Varp entries indexed by VarpKey. */ +typedef struct VarpTable { + + HashTable *table; + + /** Sweep timer. */ + struct timer_list timer; + + /** Lock. Need to use a semaphore instead of a spinlock because + * some operations under the varp table lock can schedule - and + * you mustn't hold a spinlock when scheduling. + */ + struct semaphore lock; + +} VarpTable; + +/** The varp cache. */ +static VarpTable *varp_table = NULL; + +/** Module parameter for the multicast address. */ +static char *varp_mcaddr = NULL; + +/** Multicast address (network order). */ +u32 varp_mcast_addr = 0; + +/** Unicast address (network order). */ +u32 varp_ucast_addr = 0; + +/** UDP port (network order). */ +u16 varp_port = 0; + +/** Network device to use. */ +char *varp_device = DEVICE; + +#define VarpTable_read_lock(z, flags) do{ (flags) = 0; down(&(z)->lock); } while(0) +#define VarpTable_read_unlock(z, flags) do{ (flags) = 0; up(&(z)->lock); } while(0) +#define VarpTable_write_lock(z, flags) do{ (flags) = 0; down(&(z)->lock); } while(0) +#define VarpTable_write_unlock(z, flags) do{ (flags) = 0; up(&(z)->lock); } while(0) + +#define VarpEntry_lock(ventry, flags) write_lock_irqsave(&(ventry)->lock, (flags)) +#define VarpEntry_unlock(ventry, flags) write_unlock_irqrestore(&(ventry)->lock, (flags)) + +void VarpTable_sweep(VarpTable *z, int all); +void VarpTable_print(VarpTable *z); + +/** Print the varp cache (if debug on). + */ +void varp_dprint(void){ +#ifdef DEBUG + VarpTable_print(varp_table); +#endif +} + +/** Print varp info and the varp cache. + */ +void varp_print(void){ + printk(KERN_INFO "=== VARP ===============================================================\n"); + printk(KERN_INFO "varp_device %s\n", varp_device); + printk(KERN_INFO "varp_mcast_addr " IPFMT "\n", NIPQUAD(varp_mcast_addr)); + printk(KERN_INFO "varp_ucast_addr " IPFMT "\n", NIPQUAD(varp_ucast_addr)); + printk(KERN_INFO "varp_port %d\n", ntohs(varp_port)); + VarpTable_print(varp_table); + printk(KERN_INFO "========================================================================\n"); +} + +/** Lookup a network device by name. + * + * @param name device name + * @param dev return parameter for the device + * @return 0 on success, error code otherwise + */ +int vnet_get_device(const char *name, struct net_device **dev){ + int err = 0; + *dev = dev_get_by_name(name); + if(!*dev){ + err = -ENETDOWN; + } + return err; +} + +/** Get the source address from a device. + * + * @param dev device + * @param addr return parameter for address + * @return 0 on success, error code otherwise + */ +int vnet_get_device_address(struct net_device *dev, u32 *addr){ + int err = 0; + struct in_device *in_dev; + + //printk("%s>\n", __FUNCTION__); + in_dev = in_dev_get(dev); + if(!in_dev){ + err = -EIO; + goto exit; + } + *addr = in_dev->ifa_list->ifa_address; + in_dev_put(in_dev); + exit: + //printk("%s< err=%d\n", __FUNCTION__, err); + return err; +} + +#ifndef LL_RESERVED_SPACE +#define HH_DATA_MOD 16 +#define LL_RESERVED_SPACE(dev) \ + ((dev->hard_header_len & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD) +#endif + +/** Send a varp protocol message. + * + * @param opcode varp opcode (host order) + * @param dev device (may be null) + * @param skb skb being replied to (may be null) + * @param vnet vnet id (in host order) + * @param vmac vmac (in network order) + * @return 0 on success, error code otherwise + */ +int varp_send(u16 opcode, struct net_device *dev, struct sk_buff *skbin, + u32 vnet, Vmac *vmac){ + int err = 0; + int link_n = 0; + int ip_n = sizeof(struct iphdr); + int udp_n = sizeof(struct udphdr); + int varp_n = sizeof(VarpHdr); + struct sk_buff *skbout = NULL; + struct in_device *in_dev = NULL; + VarpHdr *varph = NULL; + u8 macbuf[6] = {}; + u8 *smac, *dmac; + u32 saddr, daddr; + u16 sport, dport; + + dmac = macbuf; + dprintf("> opcode=%d vnet=%d vmac=" MACFMT "\n", + opcode, ntohl(vnet), MAC6TUPLE(vmac->mac)); + if(!dev){ + //todo: should use routing for daddr to get device. + err = vnet_get_device(varp_device, &dev); + if(err) goto exit; + } + link_n = LL_RESERVED_SPACE(dev); + in_dev = in_dev_get(dev); + if(!in_dev) goto exit; + + smac = dev->dev_addr; + saddr = in_dev->ifa_list->ifa_address; + + if(skbin){ + dmac = MAC_ETH(skbin)->h_source; + sport = skbin->h.uh->dest; + daddr = skbin->nh.iph->saddr; + //dport = skbin->h.uh->source; + dport = varp_port; + } else { + if(!in_dev) goto exit; + if(MULTICAST(varp_mcast_addr)){ + daddr = varp_mcast_addr; + ip_eth_mc_map(daddr, dmac); + } else { + daddr = in_dev->ifa_list->ifa_broadcast; + dmac = dev->broadcast; + } + sport = varp_port; + dport = varp_port; + } + in_dev_put(in_dev); + + dprintf("> smac=" MACFMT " dmac=" MACFMT "\n", MAC6TUPLE(smac), MAC6TUPLE(dmac)); + dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n", NIPQUAD(saddr), NIPQUAD(daddr)); + dprintf("> sport=%u dport=%u\n", ntohs(sport), ntohs(dport)); + + skbout = alloc_skb(link_n + ip_n + udp_n + varp_n, GFP_ATOMIC); + if (!skbout){ + err = -ENOMEM; + goto exit; + } + skbout->dev = dev; + skb_reserve(skbout, link_n); + skbout->protocol = htons(ETH_P_IP); + + // Device header. Pushes device header on front of skb. + if (dev->hard_header){ + err = dev->hard_header(skbout, dev, ETH_P_IP, dmac, smac, skbout->len); + if(err < 0) goto exit; + skbout->mac.raw = skbout->data; + } + + // IP header. + skbout->nh.raw = skb_put(skbout, ip_n); + skbout->nh.iph->version = 4; + skbout->nh.iph->ihl = ip_n / 4; + skbout->nh.iph->tos = 0; + skbout->nh.iph->tot_len = htons(ip_n + udp_n + varp_n); + skbout->nh.iph->id = 0; + skbout->nh.iph->frag_off = 0; + skbout->nh.iph->ttl = 64; + skbout->nh.iph->protocol = IPPROTO_UDP; + skbout->nh.iph->saddr = saddr; + skbout->nh.iph->daddr = daddr; + skbout->nh.iph->check = 0; + + // UDP header. + skbout->h.raw = skb_put(skbout, udp_n); + skbout->h.uh->source = sport; + skbout->h.uh->dest = dport; + skbout->h.uh->len = htons(udp_n + varp_n); + skbout->h.uh->check = 0; + + // Varp header. + varph = (void*)skb_put(skbout, varp_n); + *varph = (VarpHdr){}; + varph->id = htons(VARP_ID); + varph->opcode = htons(opcode); + varph->vnet = htonl(vnet); + varph->vmac = *vmac; + varph->addr = saddr; + + err = skb_xmit(skbout); + + exit: + if(err && skbout) kfree_skb(skbout); + dprintf("< err=%d\n", err); + return err; +} + +/** Send a varp request for the vnet and destination mac of a packet. + * + * @param skb packet + * @param vnet vnet (in host order) + * @return 0 on success, error code otherwise + */ +int varp_solicit(struct sk_buff *skb, int vnet){ + int err = 0; + dprintf("> skb=%p\n", skb); + varp_dprint(); + err = varp_send(VARP_OP_REQUEST, NULL, NULL, + vnet, (Vmac*)MAC_ETH(skb)->h_dest); + dprintf("< err=%d\n", err); + return err; +} + +/* Test some flags. + * + * @param z varp entry + * @param flags to test + * @return nonzero if flags set + */ +int VarpEntry_get_flags(VarpEntry *z, int flags){ + return z->flags & flags; +} + +/** Set some flags. + * + * @param z varp entry + * @param flags to set + * @param set set flags on if nonzero, off if zero + * @return new flags value + */ +int VarpEntry_set_flags(VarpEntry *z, int flags, int set){ + if(set){ + z->flags |= flags; + } else { + z->flags &= ~flags; + } + return z->flags; +} + +/** Print a varp entry. + * + * @param ventry varp entry + */ +void VarpEntry_print(VarpEntry *ventry){ + if(ventry){ + char *c, *d; + switch(ventry->state){ + case VARP_STATE_INCOMPLETE: c = "INC"; break; + case VARP_STATE_REACHABLE: c = "RCH"; break; + case VARP_STATE_FAILED: c = "FLD"; break; + default: c = "UNK"; break; + } + d = (VarpEntry_get_flags(ventry, VARP_FLAG_PROBING) ? "P" : " "); + + printk(KERN_INFO "VENTRY(%p ref=%1d %s %s vnet=%d vmac=" MACFMT " addr=" IPFMT " q=%d t=%lu)\n", + ventry, + atomic_read(&ventry->refcount), + c, d, + ventry->key.vnet, + MAC6TUPLE(ventry->key.vmac.mac), + NIPQUAD(ventry->addr), + skb_queue_len(&ventry->queue), + ventry->timestamp); + } else { + printk("VENTRY: Null!\n"); + } +} + +/** Free a varp entry. + * + * @param z varp entry + */ +void VarpEntry_free(VarpEntry *z){ + if(!z) return; + deallocate(z); +} + +/** Increment reference count. + * + * @param z varp entry (may be null) + */ +void VarpEntry_incref(VarpEntry *z){ + if(!z) return; + atomic_inc(&z->refcount); + //dprintf("> "); VarpEntry_print(z); +} + +/** Decrement reference count, freeing if zero. + * + * @param z varp entry (may be null) + */ +void VarpEntry_decref(VarpEntry *z){ + if(!z) return; + //dprintf("> "); VarpEntry_print(z); + if(atomic_dec_and_test(&z->refcount)){ + //dprintf("> freeing %p...\n", z); + VarpEntry_free(z); + } +} + +/** Call the error handler. + * + * @param ventry varp entry + */ +void VarpEntry_error(VarpEntry *ventry){ + struct sk_buff *skb; + skb = skb_peek(&ventry->queue); + if(!skb) return; + if(ventry->error) ventry->error(ventry, skb); + skb_queue_purge(&ventry->queue); +} + +/** Schedule the varp entry timer. + * Must increment the reference count before doing + * this the first time, so the ventry won' be freed + * before the timer goes off. + * + * @param ventry varp entry + */ +void VarpEntry_schedule(VarpEntry *ventry){ + unsigned long now = jiffies; + ventry->timer.expires = now + VARP_PROBE_INTERVAL; + add_timer(&ventry->timer); +} + +/** Function called when a varp entry timer goes off. + * If the entry is still incomplete, carries on probing. + * Otherwise stops probing. + * + * @param arg ventry + */ +static void varp_timer_fn(unsigned long arg){ + unsigned long flags; + VarpEntry *ventry = (VarpEntry *)arg; + struct sk_buff *skb = NULL; + int locked = 0, probing = 0; + + dprintf(">\n"); //VarpEntry_print(ventry); + VarpEntry_lock(ventry, flags); + locked = 1; + if(ventry->state == VARP_STATE_REACHABLE){ + // Do nothing. + } else { + // Probe if haven't run out of tries, otherwise fail. + if(atomic_read(&ventry->probes) < VARP_PROBE_MAX){ + probing = 1; + VarpEntry_schedule(ventry); + skb = skb_peek(&ventry->queue); + if(skb){ + dprintf("> skbs in queue - solicit\n"); + atomic_inc(&ventry->probes); + VarpEntry_unlock(ventry, flags); + locked = 0; + varp_solicit(skb, ventry->key.vnet); + } else { + dprintf("> empty queue.\n"); + } + } else { + dprintf("> Out of probes: FAILED\n"); + VarpEntry_error(ventry); + ventry->state = VARP_STATE_FAILED; + } + } + VarpEntry_set_flags(ventry, VARP_FLAG_PROBING, probing); + if(locked) VarpEntry_unlock(ventry, flags); + if(!probing) VarpEntry_decref(ventry); + dprintf("<\n"); +} + +/** Default error function for varp entries. + * + * @param ventry varp entry + * @param skb packet dropped because of error + */ +static void varp_error_fn(VarpEntry *ventry, struct sk_buff *skb){ +} + +/** Create a varp entry. Initializes the internal state. + * + * @param vnet vnet id + * @param vmac virtual MAC address (copied) + * @return ventry or null + */ +VarpEntry * VarpEntry_new(u32 vnet, Vmac *vmac){ + VarpEntry *z = ALLOCATE(VarpEntry); + if(z){ + unsigned long now = jiffies; + + atomic_set(&z->refcount, 1); + z->lock = RW_LOCK_UNLOCKED; + z->state = VARP_STATE_INCOMPLETE; + z->queue_max = VARP_QUEUE_MAX; + skb_queue_head_init(&z->queue); + init_timer(&z->timer); + z->timer.data = (unsigned long)z; + z->timer.function = varp_timer_fn; + z->timestamp = now; + z->error = varp_error_fn; + + z->key.vnet = vnet; + z->key.vmac = *vmac; + } + return z; +} + +/** Hash function for keys in the varp cache. + * Hashes the vnet id and mac. + * + * @param k key (VarpKey) + * @return hashcode + */ +Hashcode varp_key_hash_fn(void *k){ + VarpKey *key = k; + Hashcode h; + h = hash_2ul(key->vnet, + (key->vmac.mac[0] << 24) | + (key->vmac.mac[1] << 16) | + (key->vmac.mac[2] << 8) | + (key->vmac.mac[3] )); + h = hash_hul(h, + (key->vmac.mac[4] << 8) | + (key->vmac.mac[5] )); + return h; +} + +/** Test equality for keys in the varp cache. + * Compares vnet and mac. + * + * @param k1 key to compare (VarpKey) + * @param k2 key to compare (VarpKey) + * @return 1 if equal, 0 otherwise + */ +int varp_key_equal_fn(void *k1, void *k2){ + VarpKey *key1 = k1; + VarpKey *key2 = k2; + return (key1->vnet == key2->vnet) + && (memcmp(key1->vmac.mac, key2->vmac.mac, ETH_ALEN) == 0); +} + +/** Free an entry in the varp cache. + * + * @param table containing table + * @param entry entry to free + */ +static void varp_entry_free_fn(HashTable *table, HTEntry *entry){ + VarpEntry *ventry; + if(!entry) return; + ventry = entry->value; + if(ventry) VarpEntry_decref(ventry); + HTEntry_free(entry); +} + +/** Free the whole varp cache. + * Dangerous. + * + * @param z varp cache + */ +void VarpTable_free(VarpTable *z){ + unsigned long flags; + if(!z) return; + VarpTable_write_lock(z, flags); + del_timer(&z->timer); + z->timer.data = 0; + if(z->table) HashTable_free(z->table); + VarpTable_write_unlock(z, flags); + deallocate(z); +} + +/** Schedule the varp table timer. + * + * @param z varp table + */ +void VarpTable_schedule(VarpTable *z){ + unsigned long now = jiffies; + z->timer.expires = now + VARP_ENTRY_TTL; + add_timer(&z->timer); +} + +/** Function called when the varp table timer goes off. + * Sweeps old varp cache entries and reschedules itself. + * + * @param arg varp table + */ +static void varp_table_timer_fn(unsigned long arg){ + VarpTable *z = (VarpTable *)arg; + //dprintf("> z=%p\n", z); + if(z){ + VarpTable_sweep(z, 0); + VarpTable_schedule(z); + } + //dprintf("<\n"); +} + +/** Print a varp table. + * + * @param z table + */ +void VarpTable_print(VarpTable *z){ + HashTable_for_decl(entry); + VarpEntry *ventry; + unsigned long flags, vflags; + + //dprintf(">\n"); + VarpTable_read_lock(z, flags); + HashTable_for_each(entry, varp_table->table){ + ventry = entry->value; + VarpEntry_lock(ventry, vflags); + VarpEntry_print(ventry); + VarpEntry_unlock(ventry, vflags); + } + VarpTable_read_unlock(z, flags); + //dprintf("<\n"); +} + +/** Create a varp table. + * + * @return new table or null + */ +VarpTable * VarpTable_new(void){ + int err = -ENOMEM; + VarpTable *z = NULL; + + z = ALLOCATE(VarpTable); + if(!z) goto exit; + z->table = HashTable_new(VARP_TABLE_BUCKETS); + if(!z->table) goto exit; + z->table->key_equal_fn = varp_key_equal_fn; + z->table->key_hash_fn = varp_key_hash_fn; + z->table->entry_free_fn = varp_entry_free_fn; + init_MUTEX(&z->lock); + init_timer(&z->timer); + z->timer.data = (unsigned long)z; + z->timer.function = varp_table_timer_fn; + VarpTable_schedule(z); + err = 0; + exit: + if(err){ + VarpTable_free(z); + z = NULL; + } + return z; +} + +/** Add a new entry to the varp table. + * + * @param z table + * @param vnet vnet id + * @param vmac virtual MAC address (copied) + * @return new entry or null + */ +VarpEntry * VarpTable_add(VarpTable *z, u32 vnet, Vmac *vmac){ + int err = -ENOMEM; + VarpEntry *ventry; + HTEntry *entry; + unsigned long flags; + + ventry = VarpEntry_new(vnet, vmac); + if(!ventry) goto exit; + //dprintf("> "); VarpEntry_print(ventry); + VarpTable_write_lock(z, flags); + entry = HashTable_add(z->table, ventry, ventry); + VarpTable_write_unlock(z, flags); + if(!entry) goto exit; + VarpEntry_incref(ventry); + err = 0; + exit: + if(err){ + VarpEntry_free(ventry); + ventry = NULL; + } + return ventry; +} + +/** Remove an entry from the varp table. + * + * @param z table + * @param ventry entry to remove + * @return removed count + */ +int VarpTable_remove(VarpTable *z, VarpEntry *ventry){ + return HashTable_remove(z->table, ventry); +} + +/** Lookup an entry in the varp table. + * + * @param z table + * @param vnet vnet id + * @param vmac virtual MAC addres + * @return entry found or null + */ +VarpEntry * VarpTable_lookup(VarpTable *z, u32 vnet, Vmac *vmac){ + unsigned long flags; + VarpKey key = { .vnet = vnet, .vmac = *vmac }; + VarpEntry *ventry; + VarpTable_read_lock(z, flags); + ventry = HashTable_get(z->table, &key); + VarpTable_read_unlock(z, flags); + if(ventry) VarpEntry_incref(ventry); + return ventry; +} + +/** Handle output for a reachable ventry. + * Send the skb using the tunnel to the care-of address. + * + * @param ventry varp entry + * @param skb skb to send + * @return 0 on success, error code otherwise + */ +int VarpEntry_send(VarpEntry *ventry, struct sk_buff *skb){ + int err = 0; + unsigned long flags = 0; + u32 addr; + + dprintf("> skb=%p\n", skb); + addr = ventry->addr; + VarpEntry_unlock(ventry, flags); + err = vnet_tunnel_send(ventry->key.vnet, addr, skb); + VarpEntry_lock(ventry, flags); + dprintf("< err=%d\n", err); + return err; +} + +/** Handle output for a non-reachable ventry. Send messages to complete it. + * If the entry is still incomplete, queue the skb, otherwise + * send it. If the queue is full, dequeue and free an old skb to + * make room for the new one. + * + * @param ventry varp entry + * @param skb skb to send + * @return 0 on success, error code otherwise + */ +int VarpEntry_resolve(VarpEntry *ventry, struct sk_buff *skb){ + int err = 0; + unsigned long flags = 0; + + dprintf("> skb=%p\n", skb); //VarpEntry_print(ventry); + ventry->state = VARP_STATE_INCOMPLETE; + atomic_set(&ventry->probes, 1); + if(!VarpEntry_get_flags(ventry, VARP_FLAG_PROBING)){ + VarpEntry_set_flags(ventry, VARP_FLAG_PROBING, 1); + VarpEntry_incref(ventry); + VarpEntry_schedule(ventry); + } + VarpEntry_unlock(ventry, flags); + varp_solicit(skb, ventry->key.vnet); + VarpEntry_lock(ventry, flags); + + if(ventry->state == VARP_STATE_INCOMPLETE){ + if(skb_queue_len(&ventry->queue) >= ventry->queue_max){ + struct sk_buff *oldskb; + oldskb = ventry->queue.next; + __skb_unlink(oldskb, &ventry->queue); + dprintf("> purging skb=%p\n", oldskb); + kfree_skb(oldskb); + } + __skb_queue_tail(&ventry->queue, skb); + } else { + err = VarpEntry_send(ventry, skb); + } + dprintf("< err=%d\n", err); + return err; +} + +/** Handle output for a ventry. Resolves the ventry + * if necessary. + * + * @param ventry varp entry + * @param skb skb to send + * @return 0 on success, error code otherwise + */ +int VarpEntry_output(VarpEntry *ventry, struct sk_buff *skb){ + int err = 0; + + switch(ventry->state){ + case VARP_STATE_REACHABLE: + err = VarpEntry_send(ventry, skb); + break; + default: + err = VarpEntry_resolve(ventry, skb); + break; + } + return err; +} + +/** Process the output queue for a ventry. Sends the queued skbs if + * the ventry is reachable, otherwise drops them. + * + * @param ventry varp entry + */ +void VarpEntry_process_queue(VarpEntry *ventry){ + struct sk_buff *skb; + for( ; ; ){ + if(ventry->state != VARP_STATE_REACHABLE) break; + skb = __skb_dequeue(&ventry->queue); + if(!skb) break; + VarpEntry_output(ventry, skb); + } + skb_queue_purge(&ventry->queue); +} + +/** Update a ventry. Sets the address and state to those given + * and sets the timestamp to 'now'. + * + * @param ventry varp entry + * @param addr care-of address + * @param state state + * @return 0 on success, error code otherwise + */ +int VarpEntry_update(VarpEntry *ventry, u32 addr, int state){ + int err = 0; + unsigned long now = jiffies; + unsigned long flags; + + dprintf("> addr=" IPFMT " state=%d\n", NIPQUAD(addr), state); + //VarpEntry_print(ventry); + VarpEntry_lock(ventry, flags); + if(VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT)) goto exit; + ventry->addr = addr; + ventry->timestamp = now; + ventry->state = state; + VarpEntry_process_queue(ventry); + exit: + //dprintf("> "); VarpEntry_print(ventry); + VarpEntry_unlock(ventry, flags); + dprintf("< err=%d\n", err); + return err; +} + +int VarpTable_update(VarpTable *z, int vnet, Vmac *vmac, u32 addr, + int state, int force){ + int err = 0; + VarpEntry *ventry; + + dprintf("> vnet=%d mac=" MACFMT " addr=" IPFMT " state=%d force=%d\n", + vnet, MAC6TUPLE(vmac->mac), NIPQUAD(addr), state, force); + ventry = VarpTable_lookup(z, vnet, vmac); + if(force && !ventry){ + dprintf("> No entry, adding\n"); + ventry = VarpTable_add(z, vnet, vmac); + } + if(ventry){ + dprintf("> Updating\n"); + err = VarpEntry_update(ventry, addr, state); + VarpEntry_decref(ventry); + } else { + dprintf("> No entry found\n"); + err = -ENOENT; + } + dprintf("< err=%d\n", err); + return err; +} + +/** Update the ventry corresponding to the given varp header. + * + * @param z table + * @param varph varp header + * @param state state + * @return 0 on success, -ENOENT if no entry found + */ +int VarpTable_update_entry(VarpTable *z, VarpHdr *varph, int state){ + return VarpTable_update(z, ntohl(varph->vnet), &varph->vmac, varph->addr, state, 0); +} + +int varp_update(int vnet, unsigned char *vmac, u32 addr){ + if(!varp_table){ + return -ENOSYS; + } + return VarpTable_update(varp_table, vnet, (Vmac*)vmac, addr, + VARP_STATE_REACHABLE, 1); +} + +/** Put old varp entries into the incomplete state. + * Permanent entries are not changed. + * If 'all' is non-zero, all non-permanent entries + * are put into the incomplete state, regardless of age. + * + * @param z table + * @param all reset all entries if non-zero + */ +void VarpTable_sweep(VarpTable *z, int all){ + HashTable_for_decl(entry); + VarpEntry *ventry; + unsigned long now = jiffies; + unsigned long old = now - VARP_ENTRY_TTL; + unsigned long flags, vflags; + + //dprintf(">\n"); + VarpTable_read_lock(z, flags); + HashTable_for_each(entry, varp_table->table){ + ventry = entry->value; + VarpEntry_lock(ventry, vflags); + if(!VarpEntry_get_flags(ventry, VARP_FLAG_PERMANENT) && + (all || (ventry->timestamp < old))){ + VarpEntry_process_queue(ventry); + ventry->state = VARP_STATE_INCOMPLETE; + } + VarpEntry_unlock(ventry, vflags); + } + VarpTable_read_unlock(z, flags); + //dprintf("<\n"); +} + +/** Handle a varp request. Look for a vif with the requested + * vnet and vmac. If find one, reply with the vnet, vmac and our + * address. Otherwise do nothing. + * + * @param skb incoming message + * @param varph varp message + * @return 0 if ok, -ENOENT if no matching vif, or error code + */ +int varp_handle_request(struct sk_buff *skb, VarpHdr *varph){ + int err = -ENOENT; + u32 vnet; + Vmac *vmac; + Vif *vif = NULL; + + dprintf(">\n"); + vnet = ntohl(varph->vnet); + vmac = &varph->vmac; + dprintf("> vnet=%d vmac=" MACFMT "\n", vnet, MAC6TUPLE(vmac->mac)); + if(vif_lookup(vnet, vmac, &vif)) goto exit; + varp_send(VARP_OP_ANNOUNCE, skb->dev, skb, vnet, vmac); + vif_decref(vif); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Announce the vnet and vmac of a vif (gratuitous varp). + * + * @param dev device to send on (may be null) + * @param vif vif + * @return 0 on success, error code otherwise + */ +int varp_announce_vif(struct net_device *dev, Vif *vif){ + int err = 0; + dprintf(">\n"); + if(!varp_table){ + err = -ENOSYS; + goto exit; + } + err = varp_send(VARP_OP_ANNOUNCE, dev, NULL, vif->vnet, &vif->vmac); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Handle a varp announce message. + * Update the matching ventry if we have one. + * + * @param skb incoming message + * @param varp message + * @return 0 if OK, -ENOENT if no matching entry + */ +int varp_handle_announce(struct sk_buff *skb, VarpHdr *varph){ + int err = 0; + + dprintf(">\n"); + err = VarpTable_update_entry(varp_table, varph, VARP_STATE_REACHABLE); + dprintf("< err=%d\n", err); + return err; +} + +/** Handle an incoming varp message. + * + * @param skb incoming message + * @return 0 if OK, error code otherwise + */ +int varp_handle_message(struct sk_buff *skb){ + // Assume h. nh set, skb->data point after udp hdr (at varphdr). + int err = -EINVAL, mine = 0; + VarpHdr *varph = (void*)(skb->h.uh + 1); + + dprintf(">\n"); + if(!varp_table){ + err = -ENOSYS; + goto exit; + } + if(MULTICAST(skb->nh.iph->daddr) && + (skb->nh.iph->daddr != varp_mcast_addr)){ + // Ignore multicast packets not addressed to us. + err = 0; + dprintf("> daddr=" IPFMT " mcaddr=" IPFMT "\n", + NIPQUAD(skb->nh.iph->daddr), NIPQUAD(varp_mcast_addr)); + goto exit; + } + if(skb->len < sizeof(*varph)){ + wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(*varph)); + goto exit; + } + mine = 1; + if(varph->id != htons(VARP_ID)){ + // It's not varp at all - ignore it. + wprintf("> Unknown id: %d \n", ntohs(varph->id)); + goto exit; + } + if(1){ + dprintf("> saddr=" IPFMT " daddr=" IPFMT "\n", + NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr)); + dprintf("> sport=%u dport=%u\n", ntohs(skb->h.uh->source), ntohs(skb->h.uh->dest)); + dprintf("> opcode=%d vnet=%u vmac=" MACFMT " addr=" IPFMT "\n", + ntohs(varph->opcode), + ntohl(varph->vnet), + MAC6TUPLE(varph->vmac.mac), + NIPQUAD(varph->addr)); + varp_dprint(); + } + switch(ntohs(varph->opcode)){ + case VARP_OP_REQUEST: + err = varp_handle_request(skb, varph); + break; + case VARP_OP_ANNOUNCE: + err = varp_handle_announce(skb, varph); + break; + default: + wprintf("> Unknown opcode: %d \n", ntohs(varph->opcode)); + break; + } + exit: + if(mine) err = 1; + dprintf("< err=%d\n", err); + return err; +} + +/** Send an outgoing packet on the appropriate vnet tunnel. + * + * @param skb outgoing message + * @param vnet vnet (host order) + * @return 0 on success, error code otherwise + */ +int varp_output(struct sk_buff *skb, u32 vnet){ + int err = 0; + unsigned char *mac = NULL; + Vmac *vmac = NULL; + VarpEntry *ventry = NULL; + + dprintf("> skb=%p vnet=%u\n", skb, vnet); + if(!varp_table){ + err = -ENOSYS; + goto exit; + } + dprintf("> skb.mac=%p\n", skb->mac.raw); + if(!skb->mac.raw){ + wprintf("> No ethhdr in skb!\n"); + err = -EINVAL; + goto exit; + } + mac = MAC_ETH(skb)->h_dest; + vmac = (Vmac*)mac; + if(mac_is_multicast(mac)){ + err = vnet_tunnel_send(vnet, varp_mcast_addr, skb); + } else { + ventry = VarpTable_lookup(varp_table, vnet, vmac); + if(!ventry){ + ventry = VarpTable_add(varp_table, vnet, vmac); + } + if(ventry){ + unsigned long flags; + VarpEntry_lock(ventry, flags); + err = VarpEntry_output(ventry, skb); + VarpEntry_unlock(ventry, flags); + VarpEntry_decref(ventry); + } else { + err = -ENOMEM; + } + } + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Set the varp multicast address (after initialization). + * + * @param addr address (network order) + * @return 0 on success, error code otherwise + */ +int varp_set_mcast_addr(uint32_t addr){ + int err = 0; + varp_close(); + varp_mcast_addr = addr; + err = varp_open(varp_mcast_addr, varp_ucast_addr, varp_port); + return err; +} + +/** Initialize the varp multicast address from a module parameter. + * + * @param s address in IPv4 notation + * @return 0 on success, error code otherwise + */ +static void varp_init_mcast_addr(char *s){ + unsigned long v = 0; + + dprintf("> %s\n", s); + if(s && (get_inet_addr(s, &v) >= 0)){ + varp_mcast_addr = (u32)v; + } else { + varp_mcast_addr = htonl(VARP_MCAST_ADDR); + } +} + +/** Initialize the varp cache. + * + * @return 0 on success, error code otherwise + */ +int varp_init(void){ + int err = 0; + struct net_device *dev = NULL; + + dprintf(">\n"); + varp_table = VarpTable_new(); + if(!varp_table){ + err = -ENOMEM; + goto exit; + } + varp_init_mcast_addr(varp_mcaddr); + err = vnet_get_device(varp_device, &dev); + dprintf("> vnet_get_device(%s)=%d\n", varp_device, err); + if(err) goto exit; + err = vnet_get_device_address(dev, &varp_ucast_addr); + dprintf("> vnet_get_device_address()=%d\n", err); + if(err) goto exit; + varp_port = htons(VARP_PORT); + + err = varp_open(varp_mcast_addr, varp_ucast_addr, varp_port); + dprintf("> varp_open()=%d\n", err); + exit: + if(dev) dev_put(dev); + dprintf("< err=%d\n", err); + return err; +} + +/** Close the varp cache. + */ +void varp_exit(void){ + dprintf(">\n"); + varp_close(); + if(varp_table){ + VarpTable *z = varp_table; + varp_table = NULL; + VarpTable_free(z); + } + dprintf("<\n"); +} + +MODULE_PARM(varp_mcaddr, "s"); +MODULE_PARM_DESC(varp_mcaddr, "VARP multicast address"); + +MODULE_PARM(varp_device, "s"); +MODULE_PARM_DESC(varp_device, "VARP network device"); diff --git a/tools/vnet/vnet-module/varp.h b/tools/vnet/vnet-module/varp.h new file mode 100644 index 0000000000..4aab7fc522 --- /dev/null +++ b/tools/vnet/vnet-module/varp.h @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _VNET_VARP_H +#define _VNET_VARP_H + +#define CONFIG_VARP_GRATUITOUS 1 + +struct net_device; +struct sk_buff; +struct Vif; + +#define DEVICE "xen-br0" + +extern int vnet_get_device(const char *name, struct net_device **dev); +extern int vnet_get_device_address(struct net_device *dev, u32 *addr); + +extern int varp_handle_message(struct sk_buff *skb); +extern int varp_output(struct sk_buff *skb, u32 vnet); +extern int varp_update(int vnet, unsigned char *vmac, u32 addr); + +extern int varp_init(void); +extern void varp_exit(void); + +extern int varp_open(u32 mcaddr, u32 addr, u16 port); +extern void varp_close(void); +extern int varp_set_mcast_addr(u32 addr); + +extern void varp_print(void); + +extern int varp_announce_vif(struct net_device *dev, struct Vif *vif); +//extern int varp_announce_vifs(struct net_device *dev, struct task_struct *domain); + +extern u32 varp_mcast_addr; + + +/* MAC broadcast addr is ff-ff-ff-ff-ff-ff (all 1's). + * MAC multicast addr has low bit 1, i.e. 01-00-00-00-00-00. + */ + +/** Test if a MAC address is a multicast or broadcast address. + * + * @param mac address + * @return 1 if it is, 0 if not + */ +static inline int mac_is_multicast(u8 mac[ETH_ALEN]){ + return mac[0] & 1; +} + +/** Test if a MAC address is the broadcast address. + * + * @param mac address + * @return 1 if it is, 0 if not + */ +static inline int mac_is_broadcast(u8 mac[ETH_ALEN]){ + u8 mac_bcast_val[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + return memcmp(mac, mac_bcast_val, ETH_ALEN) == 0; +} + +/** Test if a MAC address is the all-zero address. + * + * @param mac address + * @return 1 if it is, 0 if not + */ +static inline int mac_is_zero(u8 mac[ETH_ALEN]){ + u8 mac_zero_val[ETH_ALEN] = {}; + return memcmp(mac, mac_zero_val, ETH_ALEN) == 0; +} + +/** Print format for a mac address. */ +#define MACFMT "%02x:%02x:%02x:%02x:%02x:%02x" + +#define MAC6TUPLE(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5] + +/** Get the subnet defined by a netmask and addr. + * + * @param netmask subnet netmask + * @param addr subnet address + * @return subnet + */ +static inline u32 subnet_net(u32 netmask, u32 addr){ + return netmask & addr; +} + +/** Get the address within a subnet. + * + * @param netmask subnet netmask + * @param addr address + * @return address within the subnet + */ +static inline u32 subnet_addr(u32 netmask, u32 addr){ + return ~netmask & addr; +} + +/** Get the broadcast address for a subnet. + * + * @param netmask subnet netmask + * @param netaddr subnet address + * @return subnet broadcast address + */ +static inline u32 subnet_broadcast_addr(u32 netmask, u32 netaddr){ + return subnet_net(netmask, netaddr) | ~netmask; +} + +/** Test if an address corresponds to a subnet broadcast. + * True if the address within the subnet is all 1's (in binary). + * (even if the address is not in the subnet). + * + * @param netmask subnet mask + * @param add address + * @return 1 if it does, 0 otherwise + */ +static inline int subnet_broadcast(u32 netmask, u32 addr){ + return subnet_addr(netmask, INADDR_ANY) == subnet_addr(netmask, addr); +} + +/** Test if an address is in a subnet. + * + * @param netmask subnet mask + * @param netaddr subnet address + * @param addr address + * @return 1 if it is, 0 otherwise + */ +static inline int subnet_local(u32 netmask, u32 netaddr, u32 addr){ + return subnet_net(netmask, netaddr) == subnet_net(netmask, addr); +} + +#endif /* ! _VNET_VARP_H */ diff --git a/tools/vnet/vnet-module/varp_socket.c b/tools/vnet/vnet-module/varp_socket.c new file mode 100644 index 0000000000..339d42b776 --- /dev/null +++ b/tools/vnet/vnet-module/varp_socket.c @@ -0,0 +1,639 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Get macros needed to define system calls as functions in the kernel. */ +#define __KERNEL_SYSCALLS__ +static int errno; +#include + +#define MODULE_NAME "VARP" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +// Compensate for struct sock fields having 'sk_' added +// to them in 2.6. +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + +#define SK_RECEIVE_QUEUE sk_receive_queue +#define SK_SLEEP sk_sleep + +#else + +#define SK_RECEIVE_QUEUE receive_queue +#define SK_SLEEP sleep + +#endif + +/** @file + * Support for the VARP udp sockets. + */ + +static inline mm_segment_t change_fs(mm_segment_t fs){ + mm_segment_t oldfs = get_fs(); + set_fs(fs); + return oldfs; +} + +/* Replicate the user-space socket API. + * The parts we need anyway. + */ + +/* Define the socketcall() syscall. + * Multiplexes all the socket-related calls. + * + * @param call socket call id + * @param args arguments (upto 6) + * @return call-dependent value + */ +static inline _syscall2(int, socketcall, + int, call, + unsigned long *, args) + +int socket(int family, int type, int protocol){ + unsigned long args[6]; + + args[0] = (unsigned long)family; + args[1] = (unsigned long)type; + args[2] = (unsigned long)protocol; + return socketcall(SYS_SOCKET, args); +} + +int bind(int fd, struct sockaddr *umyaddr, int addrlen){ + unsigned long args[6]; + + args[0] = (unsigned long)fd; + args[1] = (unsigned long)umyaddr; + args[2] = (unsigned long)addrlen; + return socketcall(SYS_BIND, args); +} + +int connect(int fd, struct sockaddr *uservaddr, int addrlen){ + unsigned long args[6]; + + args[0] = (unsigned long)fd; + args[1] = (unsigned long)uservaddr; + args[2] = (unsigned long)addrlen; + return socketcall(SYS_CONNECT, args); +} + +int sendto(int fd, void * buff, size_t len, + unsigned flags, struct sockaddr *addr, + int addr_len){ + unsigned long args[6]; + + args[0] = (unsigned long)fd; + args[1] = (unsigned long)buff; + args[2] = (unsigned long)len; + args[3] = (unsigned long)flags; + args[4] = (unsigned long)addr; + args[5] = (unsigned long)addr_len; + return socketcall(SYS_SENDTO, args); +} + +int recvfrom(int fd, void * ubuf, size_t size, + unsigned flags, struct sockaddr *addr, + int *addr_len){ + unsigned long args[6]; + + args[0] = (unsigned long)fd; + args[1] = (unsigned long)ubuf; + args[2] = (unsigned long)size; + args[3] = (unsigned long)flags; + args[4] = (unsigned long)addr; + args[5] = (unsigned long)addr_len; + return socketcall(SYS_RECVFROM, args); +} + +int setsockopt(int fd, int level, int optname, void *optval, int optlen){ + unsigned long args[6]; + + args[0] = (unsigned long)fd; + args[1] = (unsigned long)level; + args[2] = (unsigned long)optname; + args[3] = (unsigned long)optval; + args[4] = (unsigned long)optlen; + return socketcall(SYS_SETSOCKOPT, args); +} + +int getsockopt(int fd, int level, int optname, void *optval, int *optlen){ + unsigned long args[6]; + + args[0] = (unsigned long)fd; + args[1] = (unsigned long)level; + args[2] = (unsigned long)optname; + args[3] = (unsigned long)optval; + args[4] = (unsigned long)optlen; + return socketcall(SYS_GETSOCKOPT, args); +} + +int shutdown(int fd, int how){ + unsigned long args[6]; + + args[0] = (unsigned long)fd; + args[1] = (unsigned long)how; + return socketcall(SYS_SHUTDOWN, args); +} + +int getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len){ + unsigned long args[6]; + + args[0] = (unsigned long)fd; + args[1] = (unsigned long)usockaddr; + args[2] = (unsigned long)usockaddr_len; + return socketcall(SYS_GETSOCKNAME, args); +} + +/*============================================================================*/ +/** Socket flags. */ +enum { + VSOCK_REUSE = 1, + VSOCK_BIND = 2, + VSOCK_CONNECT = 4, + VSOCK_BROADCAST = 8, + VSOCK_MULTICAST = 16, + }; + +/** Convert socket flags to a string. + * + * @param flags flags + * @return static string + */ +char * socket_flags(int flags){ + static char s[6]; + int i = 0; + s[i++] = (flags & VSOCK_CONNECT ? 'c' : '-'); + s[i++] = (flags & VSOCK_BIND ? 'b' : '-'); + s[i++] = (flags & VSOCK_REUSE ? 'r' : '-'); + s[i++] = (flags & VSOCK_BROADCAST ? 'B' : '-'); + s[i++] = (flags & VSOCK_MULTICAST ? 'M' : '-'); + s[i++] = '\0'; + return s; +} + +/** The varp multicast socket. */ +int varp_mcast_sock = -1; + +/** The varp unicast socket. */ +int varp_ucast_sock = -1; + +/** Control flag for whether varp should be running. + * If this is set 0 then the varp thread will notice and + * (eventually) exit. This is indicated by setting varp_running + * to 0. + */ +atomic_t varp_run = ATOMIC_INIT(0); + +/** State flag indicating whether the varp thread is running. */ +atomic_t varp_running = ATOMIC_INIT(0); + +/** Set socket option to reuse address. + * + * @param sock socket + * @param reuse flag + * @return 0 on success, error code otherwise + */ +int setsock_reuse(int sock, int reuse){ + int err = 0; + err = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)); + if(err < 0){ + eprintf("> setsockopt SO_REUSEADDR: %d %d\n", err, errno); + } + return err; +} + +/** Set socket broadcast option. + * + * @param sock socket + * @param bcast flag + * @return 0 on success, error code otherwise + */ +int setsock_broadcast(int sock, int bcast){ + int err = 0; + err = setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &bcast, sizeof(bcast)); + if(err < 0){ + eprintf("> setsockopt SO_BROADCAST: %d %d\n", err, errno); + } + return err; +} + +/** Join a socket to a multicast group. + * + * @param sock socket + * @param saddr multicast address + * @return 0 on success, error code otherwise + */ +int setsock_multicast(int sock, uint32_t saddr){ + int err = 0; + struct net_device *dev = NULL; + u32 addr = 0; + struct ip_mreqn mreq = {}; + int mloop = 0; + + err = vnet_get_device(DEVICE, &dev); + if(err){ + eprintf("> error getting device: %d %d\n", err, errno); + goto exit; + } + err = vnet_get_device_address(dev, &addr); + if(err){ + eprintf("> error getting device address: %d %d\n", err, errno); + goto exit; + } + // See 'man 7 ip' for these options. + mreq.imr_multiaddr.s_addr = saddr; // IP multicast address. + //mreq.imr_address.s_addr = addr; // Interface IP address. + mreq.imr_address.s_addr = INADDR_ANY; // Interface IP address. + mreq.imr_ifindex = 0; // Interface index (0 means any). + dprintf("> saddr=%u.%u.%u.%u addr=%u.%u.%u.%u ifindex=%d\n", + NIPQUAD(saddr), NIPQUAD(addr), mreq.imr_ifindex); + err = setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &mloop, sizeof(mloop)); + if(err < 0){ + eprintf("> setsockopt IP_MULTICAST_LOOP: %d %d\n", err, errno); + goto exit; + } + err = setsockopt(sock, SOL_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); + if(err < 0){ + eprintf("> setsockopt IP_ADD_MEMBERSHIP: %d %d\n", err, errno); + goto exit; + } + exit: + err = 0; //todo: remove hack + return err; +} + +/** Set a socket's multicast ttl (default is 1). + * @param sock socket + * @param ttl ttl + * @return 0 on success, error code otherwise + */ +int setsock_multicast_ttl(int sock, uint8_t ttl){ + int err = 0; + err = setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); + return err; +} + +/** Create a socket. + * The flags can include VSOCK_REUSE, VSOCK_BROADCAST, VSOCK_CONNECT. + * + * @param socktype socket type + * @param saddr address + * @param port port + * @param flags flags + * @param val return value for the socket connection + * @return 0 on success, error code otherwise + */ +int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, int *val){ + int err = 0; + int sock; + struct sockaddr_in addr_in; + struct sockaddr *addr = (struct sockaddr *)&addr_in; + int addr_n = sizeof(addr_in); + int reuse, bcast; + int sockproto = 0; + + //dprintf(">\n"); + reuse = (flags & VSOCK_REUSE); + bcast = (flags & VSOCK_BROADCAST); + addr_in.sin_family = AF_INET; + addr_in.sin_addr.s_addr = saddr; + addr_in.sin_port = port; + dprintf("> flags=%s addr=%u.%u.%u.%u port=%d\n", + socket_flags(flags), + NIPQUAD(saddr), ntohs(port)); + + switch(socktype){ + case SOCK_DGRAM: sockproto = IPPROTO_UDP; break; + case SOCK_STREAM: sockproto = IPPROTO_TCP; break; + } + sock = socket(AF_INET, socktype, sockproto); + if(sock < 0) goto exit; + if(reuse){ + err = setsock_reuse(sock, reuse); + if(err < 0) goto exit; + } + if(bcast){ + err = setsock_broadcast(sock, bcast); + if(err < 0) goto exit; + } + if(flags & VSOCK_MULTICAST){ + err = setsock_multicast(sock, saddr); + if(err < 0) goto exit; + } + if(flags & VSOCK_CONNECT){ + err = connect(sock, addr, addr_n); + if(err < 0) goto exit; + } + if(flags & VSOCK_BIND){ + err = bind(sock, addr, addr_n); + if(err < 0) goto exit; + } + exit: + *val = (err ? -1 : sock); + if(err) eprintf("> err=%d errno=%d\n", err, errno); + return err; +} + +/** Open the varp multicast socket. + * + * @param mcaddr multicast address + * @param saddr address + * @param port port + * @param val return parameter for the socket + * @return 0 on success, error code otherwise + */ +int varp_mcast_open(uint32_t mcaddr, uint32_t saddr, uint16_t port, int *val){ + int err = 0; + int flags = VSOCK_REUSE; + int multicast = MULTICAST(mcaddr); + int sock = 0; + struct sockaddr_in addr_in; + struct sockaddr *addr = (struct sockaddr *)&addr_in; + int addr_n = sizeof(addr_in); + + dprintf(">\n"); + flags |= VSOCK_MULTICAST; + flags |= VSOCK_BROADCAST; + + err = create_socket(SOCK_DGRAM, mcaddr, port, flags, &sock); + if(err < 0) goto exit; + if(multicast){ + err = setsock_multicast_ttl(sock, 1); + if(err < 0) goto exit; + } + if(0){ + addr_in.sin_family = AF_INET; + addr_in.sin_addr.s_addr = saddr; + addr_in.sin_port = port; + err = bind(sock, addr, addr_n); + if(err < 0){ + eprintf("> bind: %d %d\n", err, errno); + goto exit; + } + } + if(0){ + struct sockaddr_in self = {}; + int self_n; + getsockname(sock, (struct sockaddr *)&self, &self_n); + dprintf("> sockname sock=%d addr=%u.%u.%u.%u port=%d\n", + sock, NIPQUAD(saddr), ntohs(port)); + } + exit: + if(err){ + shutdown(sock, 2); + } + *val = (err ? -1 : sock); + dprintf("< err=%d val=%d\n", err, *val); + return err; +} + +/** Open the varp unicast socket. + * + * @param addr address + * @param port port + * @param val return parameter for the socket + * @return 0 on success, error code otherwise + */ +int varp_ucast_open(uint32_t addr, u16 port, int *val){ + int err = 0; + int flags = VSOCK_BIND | VSOCK_REUSE; + dprintf(">\n"); + err = create_socket(SOCK_DGRAM, addr, port, flags, val); + dprintf("< err=%d val=%d\n", err, *val); + return err; +} + +/* Here because inline in 'socket.c'. */ +#ifndef sockfd_put +#define sockfd_put(sock) fput((sock)->file) +#endif + +/** Get the next skb from a socket's receive queue. + * + * @param fd socket file descriptor + * @return skb or NULL + */ +static struct sk_buff *get_sock_skb(int fd){ + int err = 0; + struct sk_buff *skb = NULL; + struct socket *sock = NULL; + + sock = sockfd_lookup(fd, &err); + if (!sock){ + dprintf("> no sock for fd=%d\n", fd); + goto exit; + } + skb = skb_dequeue(&sock->sk->SK_RECEIVE_QUEUE); + //skb = skb_recv_datagram(sock->sk, 0, 1, &recv_err); + sockfd_put(sock); + exit: + return skb; +} + +/** Handle the next skb on a socket (if any). + * + * @param fd socket file descriptor + * @return 1 if there was an skb, 0 otherwise + */ +static int handle_sock_skb(int fd){ + int ret = 0; + struct sk_buff *skb = get_sock_skb(fd); + if(skb){ + ret = 1; + dprintf("> skb fd=%d skb=%p\n", fd, skb); + varp_handle_message(skb); + kfree_skb(skb); + } + return ret; +} + +/** Add a wait queue to a socket. + * + * @param fd socket file descriptor + * @param waitq queue + * @return 0 on success, error code otherwise + */ +int sock_add_wait_queue(int fd, wait_queue_t *waitq){ + int err = 0; + struct socket *sock = NULL; + + dprintf("> fd=%d\n", fd); + sock = sockfd_lookup(fd, &err); + if (!sock) goto exit; + add_wait_queue(sock->sk->SK_SLEEP, waitq); + sockfd_put(sock); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Remove a wait queue from a socket. + * + * @param fd socket file descriptor + * @param waitq queue + * @return 0 on success, error code otherwise + */ +int sock_remove_wait_queue(int fd, wait_queue_t *waitq){ + int err = 0; + struct socket *sock = NULL; + + sock = sockfd_lookup(fd, &err); + if (!sock) goto exit; + remove_wait_queue(sock->sk->SK_SLEEP, waitq); + sockfd_put(sock); + exit: + return err; +} + +/** Loop handling the varp sockets. + * We use kernel API for this (waitqueue, schedule_timeout) instead + * of select because the select syscall was returning EFAULT. Oh well. + * + * @param arg arguments + * @return exit code + */ +int varp_main(void *arg){ + int err = 0; + long timeout = 3 * HZ; + int count = 0; + int n = 0; + DECLARE_WAITQUEUE(mcast_wait, current); + DECLARE_WAITQUEUE(ucast_wait, current); + + dprintf("> start\n"); + atomic_set(&varp_running, 1); + err = sock_add_wait_queue(varp_mcast_sock, &mcast_wait); + err = sock_add_wait_queue(varp_ucast_sock, &ucast_wait); + for(n = 1; atomic_read(&varp_run) == 1; n++){ + //dprintf("> n=%d\n", n); + count = 0; + count += handle_sock_skb(varp_mcast_sock); + count += handle_sock_skb(varp_ucast_sock); + if(!count){ + // No skbs were handled, so go back to sleep. + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(timeout); + current->state = TASK_RUNNING; + } + } + sock_remove_wait_queue(varp_mcast_sock, &mcast_wait); + sock_remove_wait_queue(varp_ucast_sock, &ucast_wait); + atomic_set(&varp_running, 0); + //MOD_DEC_USE_COUNT; + dprintf("< stop err=%d\n", err); + return err; +} + +/** Start the varp thread. + * + * @return 0 on success, error code otherwise + */ +int varp_start(void){ + int err = 0; + void *args = NULL; + int flags = 0; + long pid = 0; + + dprintf(">\n"); + //flags |= CLONE_VM; + flags |= CLONE_FS; + flags |= CLONE_FILES; + flags |= CLONE_SIGHAND; + atomic_set(&varp_run, 1); + atomic_set(&varp_running, 0); + pid = kernel_thread(varp_main, args, flags); + dprintf("< pid=%ld\n", pid); + return err; +} + +/** Close the varp sockets and stop the thread handling them. + */ +void varp_close(void){ + mm_segment_t oldfs; + long timeout = 1 * HZ; + int tries = 10; + dprintf(">\n"); + // Tell the varp thread to stop and wait a while for it. + atomic_set(&varp_run, 0); + while(atomic_read(&varp_running) && tries-- > 0){ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(timeout); + current->state = TASK_RUNNING; + } + // Close the sockets. + oldfs = change_fs(KERNEL_DS); + if(varp_mcast_sock > 0){ + shutdown(varp_mcast_sock, 2); + varp_mcast_sock = -1; + } + if(varp_ucast_sock > 0){ + shutdown(varp_ucast_sock, 2); + varp_ucast_sock = -1; + } + set_fs(oldfs); + //MOD_DEC_USE_COUNT; + dprintf("<\n"); +} + +/** Open the varp sockets and start the thread handling them. + * + * @param mcaddr multicast address + * @param addr unicast address + * @param port port + * @return 0 on success, error code otherwise + */ +int varp_open(u32 mcaddr, u32 addr, u16 port){ + int err = 0; + mm_segment_t oldfs; + + //MOD_INC_USE_COUNT; + dprintf("> mcaddr=%u.%u.%u.%u addr=%u.%u.%u.%u port=%u\n", + NIPQUAD(mcaddr), NIPQUAD(addr), ntohs(port)); + //MOD_INC_USE_COUNT; + oldfs = change_fs(KERNEL_DS); + err = varp_mcast_open(mcaddr, addr, port, &varp_mcast_sock); + if(err < 0 ) goto exit; + err = varp_ucast_open(INADDR_ANY, port, &varp_ucast_sock); + if(err < 0 ) goto exit; + set_fs(oldfs); + err = varp_start(); + exit: + set_fs(oldfs); + if(err){ + varp_close(); + } + dprintf("< err=%d\n", err); + return err; +} + diff --git a/tools/vnet/vnet-module/vif.c b/tools/vnet/vnet-module/vif.c new file mode 100644 index 0000000000..43d864c1a1 --- /dev/null +++ b/tools/vnet/vnet-module/vif.c @@ -0,0 +1,267 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include "allocate.h" +#include "hash_table.h" +#include "sys_net.h" +#include "sys_string.h" + +#define MODULE_NAME "VNET" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +/** Table of vifs indexed by VifKey. */ +HashTable *vif_table = NULL; + +void vif_decref(Vif *vif){ + if(!vif) return; + if(atomic_dec_and_test(&vif->refcount)){ + kfree(vif); + } +} + +void vif_incref(Vif *vif){ + if(!vif) return; + atomic_inc(&vif->refcount); +} + +/** Hash function for keys in the vif table. + * Hashes the vnet id and mac. + * + * @param k key (VifKey) + * @return hashcode + */ +Hashcode vif_key_hash_fn(void *k){ + VifKey *key = k; + Hashcode h; + h = hash_2ul(key->vnet, + (key->vmac.mac[0] << 24) | + (key->vmac.mac[1] << 16) | + (key->vmac.mac[2] << 8) | + (key->vmac.mac[3] )); + h = hash_hul(h, + (key->vmac.mac[4] << 8) | + (key->vmac.mac[5] )); + return h; +} + + +/** Test equality for keys in the vif table. + * Compares vnet and mac. + * + * @param k1 key to compare (VifKey) + * @param k2 key to compare (VifKey) + * @return 1 if equal, 0 otherwise + */ +int vif_key_equal_fn(void *k1, void *k2){ + VifKey *key1 = k1; + VifKey *key2 = k2; + return (key1->vnet == key2->vnet) && (memcmp(key1->vmac.mac, key2->vmac.mac, ETH_ALEN) == 0); +} + +/** Free an entry in the vif table. + * + * @param table containing table + * @param entry entry to free + */ +static void vif_entry_free_fn(HashTable *table, HTEntry *entry){ + Vif *vif; + if(!entry) return; + vif = entry->value; + if(vif){ + vif_decref(vif); + } + HTEntry_free(entry); +} + +/** Lookup a vif. + * + * @param vnet vnet id + * @param mac MAC address + * @return 0 on success, -ENOENT otherwise + */ +int vif_lookup(int vnet, Vmac *vmac, Vif **vif){ + int err = 0; + VifKey key = {}; + HTEntry *entry = NULL; + + key.vnet = vnet; + key.vmac = *vmac; + entry = HashTable_get_entry(vif_table, &key); + if(entry){ + *vif = entry->value; + vif_incref(*vif); + } else { + *vif = NULL; + err = -ENOENT; + } + //dprintf("< err=%d addr=" IPFMT "\n", err, NIPQUAD(*coaddr)); + return err; +} + +/** Create a new vif. + * + * @param vnet vnet id + * @param mac MAC address + * @return 0 on success, negative error code otherwise + */ +int vif_add(int vnet, Vmac *vmac, Vif **val){ + int err = 0; + Vif *vif = NULL; + HTEntry *entry; + dprintf("> vnet=%d\n", vnet); + vif = ALLOCATE(Vif); + if(!vif){ + err = -ENOMEM; + goto exit; + } + atomic_set(&vif->refcount, 1); + vif->vnet = vnet; + vif->vmac = *vmac; + entry = HashTable_add(vif_table, vif, vif); + if(!entry){ + err = -ENOMEM; + deallocate(vif); + vif = NULL; + goto exit; + } + vif_incref(vif); + exit: + *val = (err ? NULL : vif); + dprintf("< err=%d\n", err); + return err; +} + +/** Delete an entry. + * + * @param vnet vnet id + * @param mac MAC address + * @param coaddr return parameter for care-of address + * @return number of entries deleted, or negative error code + */ +int vif_remove(int vnet, Vmac *vmac){ + int err = 0; + VifKey key = { .vnet = vnet, .vmac = *vmac }; + //dprintf("> vnet=%d addr=%u.%u.%u.%u\n", vnet, NIPQUAD(coaddr)); + err = HashTable_remove(vif_table, &key); + //dprintf("< err=%d\n", err); + return err; +} + +int vif_find(int vnet, Vmac *vmac, int create, Vif **vif){ + int err = 0; + + err = vif_lookup(vnet, vmac, vif); + if(err && create){ + err = vif_add(vnet, vmac, vif); + } + return err; +} + +void vif_purge(void){ + HashTable_clear(vif_table); +} + +int vif_create(int vnet, Vmac *vmac, Vif **vif){ + int err = 0; + + dprintf(">\n"); + if(!vif_lookup(vnet, vmac, vif)){ + err = -EEXIST; + goto exit; + } + dprintf("> vif_add...\n"); + err = vif_add(vnet, vmac, vif); + exit: + if(err){ + *vif = NULL; + } + dprintf("< err=%d\n", err); + return err; +} + +/** Create a vif. + * + * @param vnet vnet id + * @param mac mac address (as a string) + * @return 0 on success, error code otherwise + */ +int mkvif(int vnet, char *mac){ + int err = 0; + Vmac vmac = {}; + Vif *vif = NULL; + dprintf("> vnet=%d mac=%s\n", vnet, mac); + err = mac_aton(mac, vmac.mac); + if(err) goto exit; + err = vif_create(vnet, &vmac, &vif); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Initialize the vif table. + * + * @return 0 on success, error code otherwise + */ +int vif_init(void){ + int err = 0; + dprintf(">\n"); + vif_table = HashTable_new(0); + if(!vif_table){ + err = -ENOMEM; + goto exit; + } + vif_table->entry_free_fn = vif_entry_free_fn; + vif_table->key_hash_fn = vif_key_hash_fn; + vif_table->key_equal_fn = vif_key_equal_fn; + + // Some vifs for testing. + //mkvif(1, "aa:00:00:00:20:11"); + //mkvif(2, "aa:00:00:00:20:12"); + exit: + if(err < 0) wprintf("< err=%d\n", err); + dprintf("< err=%d\n", err); + return err; +} + +void vif_exit(void){ + HashTable_free(vif_table); +} diff --git a/tools/vnet/vnet-module/vif.h b/tools/vnet/vnet-module/vif.h new file mode 100644 index 0000000000..379725189c --- /dev/null +++ b/tools/vnet/vnet-module/vif.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _VNET_VIF_H_ +#define _VNET_VIF_H_ + +#include +struct net_device; + +/** Key for entries in the vif table. */ +typedef struct VifKey { + int vnet; + Vmac vmac; +} VifKey; + +typedef struct Vif { + int vnet; + Vmac vmac; + struct net_device *dev; + atomic_t refcount; +} Vif; + +struct HashTable; +extern struct HashTable *vif_table; + +extern void vif_decref(Vif *vif); +extern void vif_incref(Vif *vif); + +extern int vif_create(int vnet, Vmac *vmac, Vif **vif); + +extern int vif_add(int vnet, Vmac *vmac, Vif **vif); +extern int vif_lookup(int vnet, Vmac *vmac, Vif **vif); +extern int vif_remove(int vnet, Vmac *vmac); +extern int vif_find(int vnet, Vmac *vmac, int create, Vif **vif); +extern void vif_purge(void); + +extern int vif_init(void); +extern void vif_exit(void); + +#endif diff --git a/tools/vnet/vnet-module/vnet.c b/tools/vnet/vnet-module/vnet.c new file mode 100644 index 0000000000..6027cd6604 --- /dev/null +++ b/tools/vnet/vnet-module/vnet.c @@ -0,0 +1,767 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "allocate.h" +#include "hash_table.h" +#include "sys_net.h" +#include "sys_string.h" + +#define MODULE_NAME "VNET" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +/** Default vnet security level. + */ +int vnet_security_default = SA_AUTH ; //| SA_CONF; + +/** Key for entries in the vnet address table. */ +typedef struct VnetAddrKey { + /** Vnet id. */ + int vnet; + /** MAC address. */ + unsigned char mac[ETH_ALEN]; +} VnetAddrKey; + +/** The physical vnet. */ +Vnet *vnet_physical = NULL; + +/** Table of vnets indexed by id. */ +static HashTable *vnet_table = NULL; + +/** Decrement reference count, freeing if zero. + * + * @param info vnet (OK if null) + */ +void Vnet_decref(Vnet *info){ + if(!info) return; + if(atomic_dec_and_test(&info->refcount)){ + dprintf("> free vnet=%u\n", info->vnet); + vnet_dev_remove(info); + deallocate(info); + } +} + +/** Increment reference count. + * + * @param info vnet (OK if null) + */ +void Vnet_incref(Vnet *info){ + if(!info) return; + atomic_inc(&info->refcount); +} + +/** Allocate a vnet, setting reference count to 1. + * + * @param info return parameter for vnet + * @return 0 on success, error code otherwise + */ +int Vnet_alloc(Vnet **info){ + int err = 0; + *info = ALLOCATE(Vnet); + if(*info){ + atomic_set(&(*info)->refcount, 1); + } else { + err = -ENOMEM; + } + return err; +} + +/** Add a vnet to the table under its vnet id. + * + * @param info vnet to add + * @return 0 on success, error code otherwise + */ +int Vnet_add(Vnet *info){ + int err = 0; + HTEntry *entry = NULL; + // Vnet_del(info->vnet); //todo: Delete existing vnet info? + Vnet_incref(info); + entry = HashTable_add(vnet_table, HKEY(info->vnet), info); + if(!entry){ + err = -ENOMEM; + Vnet_decref(info); + } + return err; +} + +/** Remove a vnet from the table. + * + * @param vnet id of vnet to remove + * @return number of vnets removed + */ +int Vnet_del(vnetid_t vnet){ + return HashTable_remove(vnet_table, HKEY(vnet)); +} + +/** Lookup a vnet by id. + * References the vnet on success - the caller must decref. + * + * @param vnet vnet id + * @param info return parameter for vnet + * @return 0 on sucess, -ENOENT if no vnet found + */ +int Vnet_lookup(vnetid_t vnet, Vnet **info){ + int err = 0; + dprintf("> vnet=%u info=%p\n", vnet, info); + dprintf("> vnet_table=%p\n",vnet_table); + *info = HashTable_get(vnet_table, HKEY(vnet)); + if(*info){ + Vnet_incref(*info); + } else { + err = -ENOENT; + } + dprintf("< err=%d\n", err); + return err; +} + +/** Free an entry in the vnet table. + * + * @param table containing table + * @param entry to free + */ +static void vnet_entry_free_fn(HashTable *table, HTEntry *entry){ + Vnet *info; + if(!entry) return; + info = entry->value; + if(info){ + vnet_dev_remove(info); + Vnet_decref(info); + } + HTEntry_free(entry); +} + +/** Setup some vnet entries (for testing). + * Vnet 1 is physical, vnets 2 to 10 are insecure, vnets above + * 10 are secure. + * + * @return 0 on success, negative error code otherwise + */ +static int vnet_setup(void){ + int err = 0; + int i, n = 5; //20; + int security = vnet_security_default; + Vnet *vnet; + + dprintf(">\n"); + for(i=0; ivnet = VNET_VIF + i; + vnet->security = (vnet->vnet > 10 ? security : 0); + //err = Vnet_add(vnet); + err = Vnet_create(vnet); + if(err) break; + } + dprintf("< err=%d\n", err); + return err; +} + +/** Initialize the vnet table and the physical vnet. + * + * @return 0 on success, error code otherwise + */ +int vnet_init(void){ + int err = 0; + + dprintf(">\n"); + vnet_table = HashTable_new(0); + dprintf("> vnet_table=%p\n", vnet_table); + if(!vnet_table){ + err = -ENOMEM; + goto exit; + } + vnet_table->entry_free_fn = vnet_entry_free_fn; + + err = Vnet_alloc(&vnet_physical); + if(err) goto exit; + vnet_physical->vnet = VNET_PHYS; + vnet_physical->security = 0; + err = Vnet_add(vnet_physical); + if(err) goto exit; + err = vnet_setup(); + if(err) goto exit; + err = varp_init(); + if(err) goto exit; + err = vif_init(); + exit: + if(err < 0) wprintf("< err=%d\n", err); + return err; +} + +void vnet_exit(void){ + vif_exit(); + varp_exit(); + HashTable_free(vnet_table); + vnet_table = NULL; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) + +static inline int skb_route(struct sk_buff *skb, struct rtable **prt){ + int err = 0; + struct flowi fl = { + .oif = skb->dev->ifindex, + .nl_u = { + .ip4_u = { + .daddr = skb->nh.iph->daddr, + .saddr = skb->nh.iph->saddr, + .tos = skb->nh.iph->tos, + } + } + }; + + err = ip_route_output_key(prt, &fl); + return err; +} + +#else + +static inline int skb_route(struct sk_buff *skb, struct rtable **prt){ + int err = 0; + struct rt_key key = { }; + key.dst = skb->nh.iph->daddr; + key.src = skb->nh.iph->saddr; + key.tos = skb->nh.iph->tos; + key.oif = skb->dev->ifindex; + err = ip_route_output_key(prt, &key); + return err; +} + +#endif + +inline int skb_xmit(struct sk_buff *skb){ + int err = 0; + struct rtable *rt = NULL; + + dprintf("> skb=%p dev=%s\n", skb, skb->dev->name); + + skb->protocol = htons(ETH_P_IP); + err = skb_route(skb, &rt); + if(err) goto exit; + skb->dst = &rt->u.dst; + + ip_select_ident(skb->nh.iph, &rt->u.dst, NULL); + + if(skb->nh.iph->saddr == 0){ + skb->nh.iph->saddr = rt->rt_src; + } + + skb->nh.iph->check = 0; + skb->nh.iph->check = ip_compute_csum(skb->nh.raw, (skb->nh.iph->ihl << 2)); + + err = neigh_compat_output(skb); + + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Called when a vif sends a packet to the network. + * Encapsulates the packet for its vnet and forwards it. + * + * @param skb packet + * @return 0 on success, error code otherwise + * + * @todo fixme + */ +int vnet_skb_send(struct sk_buff *skb, u32 vnet){ + int err = 0; + Vif *vif = NULL; + + dprintf("> skb=%p vnet=%u\n", skb, vnet); + if(vnet == VNET_PHYS || !vnet){ + // For completeness, send direct to the network. + if(skb->dev){ + err = skb_xmit(skb); + } else { + // Can't assume eth0 - might be nbe-br or other. Need to route. + struct net_device *dev = NULL; + err = vnet_get_device(DEVICE, &dev); + if(err) goto exit; + skb->dev = dev; + err = skb_xmit(skb); + dev_put(dev); + } + } else { + dprintf("> varp_output\n"); + err = varp_output(skb, vnet); + } + //dprintf("< err=%d\n", err); + exit: + if(vif) vif_decref(vif); + dprintf("< err=%d\n", err); + return err; +} + +/** Receive an skb for a vnet. + * If the dest is broadcast, goes to all vifs on the vnet. + * If the dest is unicast, goes to addressed vif on vnet. + * For each vif we set the packet dev and receive the packet. + * + * The packet must have skb->mac.raw set and skb->data must point + * after the device (ethernet) header. + * + * @param skb packet + * @param vnet packet vnet + * @param vmac packet vmac + * @return 0 on success, error code otherwise + */ +#if 1 +int vnet_skb_recv(struct sk_buff *skb, u32 vnet, Vmac *vmac){ + // Receive the skb for a vnet. + // We make the skb come out of the vif for the vnet, and + // let ethernet bridging forward it to related interfaces. + int err = 0; + Vnet *info = NULL; + + dprintf("> vnet=%u mac=%s\n", vnet, mac_ntoa(vmac->mac)); + err = Vnet_lookup(vnet, &info); + if(err) goto exit; + skb->dev = info->dev; + dprintf("> netif_rx dev=%s\n", skb->dev->name); + netif_rx(skb); + exit: + if(info) Vnet_decref(info); + if(err){ + kfree_skb(skb); + } + dprintf("< err=%d\n", err); + return err; +} + +#else +int vnet_skb_recv(struct sk_buff *skb, u32 vnet, Vmac *vmac){ + int err = 0; + Vif *vif = NULL; + + dprintf("> vnet=%u mac=%s\n", vnet, mac_ntoa(vmac->mac)); + if(mac_is_multicast(vmac->mac)){ + HashTable_for_decl(entry); + int count = 0; + struct sk_buff *new_skb; + + HashTable_for_each(entry, vif_table){ + vif = entry->value; + if(vif->vnet != vnet) continue; + count++; + new_skb = skb_copy(skb, GFP_ATOMIC); + if(!new_skb) break; + new_skb->dev = vif->dev; + dprintf("> %d] netif_rx dev=%s\n", count, new_skb->dev->name); + netif_rx(new_skb); + } + kfree_skb(skb); + } else { + err = vif_lookup(vnet, vmac, &vif); + if(err){ + kfree_skb(skb); + goto exit; + } + skb->dev = vif->dev; + dprintf("> netif_rx dev=%s\n", skb->dev->name); + netif_rx(skb); + } + exit: + dprintf("< err=%d\n", err); + return err; +} +#endif + +/** Check validity of an incoming IP frame. + * + * @param skb frame + * @return 0 if ok, error code otherwise + * + * @todo fixme Can prob skip most of this because linux will have done it. + * @todo Only need the vnet skb context check. + */ +int check_ip_frame(struct sk_buff *skb){ + int err = -EINVAL; + struct iphdr* iph; + struct net_device *dev; + __u32 len; + __u16 check; + +#if 0 + if(skb->context){ + // Todo: After ESP want to skip most checks (including checksum), + // Todo: but in general may not want to skip all checks on detunnel. + //dprintf("> Skip check, has context\n"); + err = 0; + goto exit; + } +#endif + // Check we have enough for an ip header - the skb passed should + // have data pointing at the eth header and skb->len should include + // that. skb->nh should already have been set. Let the indvidual + // protocol handlers worry about the exact ip header len + // (i.e. whether any ip options are set). + dev = skb->dev; + + if(skb->len < ETH_HLEN + sizeof(struct iphdr)){ + wprintf("> packet too short for ip header\n"); + goto exit; + } + + iph = skb->nh.iph; + /* + * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. + * + * Is the datagram acceptable? + * + * 1. Length at least the size of an ip header + * 2. Version of 4 + * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] + * 4. Doesn't have a bogus length + */ + if (iph->ihl < 5 || iph->version != 4){ + wprintf("> len and version check failed\n"); + goto exit; + } + if(skb->len < ETH_HLEN + (iph->ihl << 2)){ + wprintf("> packet too short for given ihl\n"); + goto exit; + } + + check = iph->check; + //iph->check = 0; + //iph->check = compute_cksum((__u16 *)iph, (iph->ihl << 1)); + if(iph->check != check){ + wprintf("> invalid checksum\n"); + goto exit; + } + + len = ntohs(iph->tot_len); + if (skb->len < len + ETH_HLEN || len < (iph->ihl << 2)){ + wprintf("> packet too short for tot_len\n"); + goto exit; + } + skb->h.raw = skb->nh.raw + (iph->ihl << 2); + err = 0; + exit: + return err; +} + +/** Determine ESP security mode for a new SA. + * + * @param spi incoming spi + * @param protocol incoming protocol + * @param addr source address + * @return security level or negative error code + * + * @todo Need to check spi, and do some lookup for security params. + */ +int vnet_sa_security(u32 spi, int protocol, u32 addr){ + int security = vnet_security_default; + dprintf("< security=%x\n", security); + return security; +} + +/** Create a new SA for incoming traffic. + * + * @param spi incoming spi + * @param protocol incoming protocol + * @param addr source address + * @param sa return parameter for SA + * @return 0 on success, error code otherwise + */ +int vnet_sa_create(u32 spi, int protocol, u32 addr, SAState **sa){ + int err = 0; + int security = vnet_sa_security(spi, protocol, addr); + if(security < 0){ + err = security; + goto exit; + } + err = sa_create(security, spi, protocol, addr, sa); + exit: + return err; +} + +/** Check that a context has the correct properties w.r.t. a vnet. + * The context must be secure if the vnet requires security. + * + * @param vnet vnet id + * @param context context + * @return 0 on success, error code otherwise + * + * @todo Need to check that the sa provides the correct security level. + */ +int vnet_check_context(int vnet, SkbContext *context, Vnet **val){ + int err = 0; + Vnet *info = NULL; + SAState *sa = NULL; + + err = Vnet_lookup(vnet, &info); + if(err){ + wprintf("> No vnet %d\n", vnet); + goto exit; + } + if(!info->security) goto exit; + err = -EINVAL; + if(!context){ + wprintf("> No security context\n"); + goto exit; + } + if(context->protocol != IPPROTO_ESP){ + wprintf("> Invalid protocol: wanted %d, got %d\n", IPPROTO_ESP, context->protocol); + goto exit; + } + sa = context->data; + //todo: Check security properties of the SA are correct w.r.t. the vnet. + //Something like sa->security == info->security; + err = 0; + exit: + *val = info; + return err; +} + +/** Open function for SA tunnels. + * + * @param tunnel to open + * @return 0 on success, error code otherwise + */ +static int sa_tunnel_open(Tunnel *tunnel){ + int err = 0; + //dprintf(">\n"); + //dprintf("< err=%d\n", err); + return err; +} + +/** Close function for SA tunnels. + * + * @param tunnel to close (OK if null) + */ +static void sa_tunnel_close(Tunnel *tunnel){ + SAState *sa; + dprintf(">\n"); + if(!tunnel) return; + sa = tunnel->data; + if(!sa) return; + SAState_decref(sa); + tunnel->data = NULL; + dprintf("<\n"); +} + +/** Packet send function for SA tunnels. + * + * @param tunnel to send on + * @param skb packet to send + * @return 0 on success, negative error code on error + */ +static int sa_tunnel_send(Tunnel *tunnel, struct sk_buff *skb){ + int err = -EINVAL; + SAState *sa; + //dprintf("> tunnel=%p\n", tunnel); + if(!tunnel){ + wprintf("> Null tunnel!\n"); + goto exit; + } + sa = tunnel->data; + if(!sa){ + wprintf("> Null SA!\n"); + goto exit; + } + err = SAState_send(sa, skb, tunnel->base); + exit: + //dprintf("< err=%d\n", err); + return err; +} + +/** Functions used by SA tunnels. */ +static TunnelType _sa_tunnel_type = { + .name = "SA", + .open = sa_tunnel_open, + .close = sa_tunnel_close, + .send = sa_tunnel_send +}; + +/** Functions used by SA tunnels. */ +TunnelType *sa_tunnel_type = &_sa_tunnel_type; + +/** Open a tunnel for a vnet to a given address. + * + * @param vnet vnet id + * @param addr destination address + * @param tunnel return parameter + * @return 0 on success, error code otherwise + */ +int vnet_tunnel_open(u32 vnet, u32 addr, Tunnel **tunnel){ + extern TunnelType *etherip_tunnel_type; + int err = 0; + Vnet *info = NULL; + Tunnel *base_tunnel = NULL; + Tunnel *sa_tunnel = NULL; + Tunnel *etherip_tunnel = NULL; + + dprintf("> vnet=%u addr=" IPFMT "\n", vnet, NIPQUAD(addr)); + err = Vnet_lookup(vnet, &info); + dprintf("> Vnet_lookup=%d\n", err); + if(err) goto exit; + if(info->security){ + SAState *sa = NULL; + dprintf("> security=%d\n", info->security); + err = Tunnel_create(sa_tunnel_type, vnet, addr, base_tunnel, &sa_tunnel); + if(err) goto exit; + dprintf("> sa_tunnel=%p\n", sa_tunnel); + err = sa_create(info->security, 0, IPPROTO_ESP, addr, &sa); + if(err) goto exit; + sa_tunnel->data = sa; + dprintf("> sa=%p\n", sa); + base_tunnel = sa_tunnel; + } + err = Tunnel_create(etherip_tunnel_type, vnet, addr, base_tunnel, ðerip_tunnel); + if(err) goto exit; + err = Tunnel_add(etherip_tunnel); + exit: + Tunnel_decref(sa_tunnel); + Vnet_decref(info); + if(err){ + *tunnel = NULL; + } else { + *tunnel = etherip_tunnel; + } + dprintf("< err=%d\n", err); + return err; +} + +/** Lookup a tunnel for a vnet to a given address. + * Uses an existing tunnel if there is one. + * + * @param vnet vnet id + * @param addr care-of address + * @param tunnel return parameter + * @return 0 on success, error code otherwise + */ +int vnet_tunnel_lookup(u32 vnet, u32 addr, Tunnel **tunnel){ + int err = 0; + dprintf("> vnet=%d addr=" IPFMT "\n", vnet, NIPQUAD(addr)); + *tunnel = Tunnel_lookup(vnet, addr); + if(!*tunnel){ + err = vnet_tunnel_open(vnet, addr, tunnel); + } + dprintf("< err=%d\n", err); + return err; +} + +/** Send a packet on the appropriate tunnel. + * + * @param vnet vnet + * @param addr tunnel endpoint + * @param skb packet + * @return 0 on success, error code otherwise + */ +int vnet_tunnel_send(vnetid_t vnet, vnetaddr_t addr, struct sk_buff *skb){ + int err = 0; + Tunnel *tunnel = NULL; + dprintf("> vnet=%u addr=" IPFMT "\n", vnet, NIPQUAD(addr)); + err = vnet_tunnel_lookup(vnet, addr, &tunnel); + if(err) goto exit; + err = Tunnel_send(tunnel, skb); + Tunnel_decref(tunnel); + exit: + dprintf("< err=%d\n", err); + return err; +} + +static void __exit vnet_module_exit(void){ + ProcFS_exit(); + sa_table_exit(); + vnet_exit(); + esp_module_exit(); + etherip_module_exit(); + tunnel_module_init(); + random_module_exit(); +} + +/** Initialize the vnet module. + * Failure is fatal. + * + * @return 0 on success, error code otherwise + */ +static int __init vnet_module_init(void){ + int err = 0; + + dprintf(">\n"); + err = random_module_init(); + if(err) wprintf("> random_module_init err=%d\n", err); + if(err) goto exit; + err = tunnel_module_init(); + if(err) wprintf("> tunnel_module_init err=%d\n", err); + if(err) goto exit; + err = etherip_module_init(); + if(err) wprintf("> etherip_module_init err=%d\n", err); + if(err) goto exit; + err = esp_module_init(); + if(err) wprintf("> esp_module_init err=%d\n", err); + if(err) goto exit; + err = vnet_init(); + if(err) wprintf("> vnet_init err=%d\n", err); + if(err) goto exit; + sa_algorithm_probe_all(); + err = sa_table_init(); + if(err) wprintf("> sa_table_init err=%d\n", err); + ProcFS_init(); + exit: + if(err < 0){ + vnet_module_exit(); + } + if(err < 0) wprintf("< err=%d\n", err); + return err; +} + +module_init(vnet_module_init); +module_exit(vnet_module_exit); +MODULE_LICENSE("GPL"); diff --git a/tools/vnet/vnet-module/vnet.h b/tools/vnet/vnet-module/vnet.h new file mode 100644 index 0000000000..3cee13bbd7 --- /dev/null +++ b/tools/vnet/vnet-module/vnet.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef __VNET_VNET_H__ +#define __VNET_VNET_H__ + +#include +#include + +#include +#include + +struct Vmac; +struct Vif; +struct net_device; + +typedef uint32_t vnetid_t; +typedef uint32_t vnetaddr_t; + +/** Vnet property record. */ +typedef struct Vnet { + /** Reference count. */ + atomic_t refcount; + /** Vnet id. */ + vnetid_t vnet; + /** Security flag. If true the vnet requires ESP. */ + int security; + + struct net_device *dev; + struct net_device *bridge; + + /** Max size of the header. */ + int header_n; + /** Statistics. */ + struct net_device_stats stats; + int recursion; +} Vnet; + +extern int Vnet_lookup(vnetid_t id, Vnet **vnet); +extern int Vnet_add(Vnet *vnet); +extern int Vnet_del(vnetid_t vnet); +extern void Vnet_incref(Vnet *); +extern void Vnet_decref(Vnet *); +extern int Vnet_alloc(Vnet **vnet); +extern Vnet *vnet_physical; + +extern int skb_xmit(struct sk_buff *skb); +extern int vnet_skb_send(struct sk_buff *skb, u32 vnet); +extern int vnet_skb_recv(struct sk_buff *skb, u32 vnet, struct Vmac *vmac); + +extern int vnet_check_context(int vnet, SkbContext *context, Vnet **vinfo); + +extern int vnet_tunnel_open(vnetid_t vnet, vnetaddr_t addr, Tunnel **tunnel); +extern int vnet_tunnel_lookup(vnetid_t vnet, vnetaddr_t addr, Tunnel **tunnel); +extern int vnet_tunnel_send(vnetid_t vnet, vnetaddr_t addr, struct sk_buff *skb); + +extern int vnet_init(void); + +enum { + HANDLE_OK = 1, + HANDLE_NO = 0, +}; + +extern int vnet_sa_security(u32 spi, int protocol, u32 addr); +struct SAState; +extern int vnet_sa_create(u32 spi, int protocol, u32 addr, struct SAState **sa); + +enum { + VNET_PHYS = 1, + VNET_VIF = 2, +}; + +#endif /* !__VNET_VNET_H__ */ diff --git a/tools/vnet/vnet-module/vnet_dev.c b/tools/vnet/vnet-module/vnet_dev.c new file mode 100644 index 0000000000..3836606b4f --- /dev/null +++ b/tools/vnet/vnet-module/vnet_dev.c @@ -0,0 +1,534 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define MODULE_NAME "VNET" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +#define VNETIF_FMT "vnetif%u" +#define VNETBR_FMT "vnet%u" + +#ifndef CONFIG_BRIDGE +#error Must configure ethernet bridging in Network Options +#endif + +#include +#define dev_bridge(_dev) ((struct net_bridge *)(_dev)->priv) + +static void vnet_dev_destructor(struct net_device *dev){ + dprintf(">\n"); + dev->open = NULL; + dev->stop = NULL; + dev->uninit = NULL; + dev->destructor = NULL; + dev->hard_start_xmit = NULL; + dev->get_stats = NULL; + dev->do_ioctl = NULL; + dev->change_mtu = NULL; + + dev->tx_timeout = NULL; + dev->set_multicast_list = NULL; + dev->flags = 0; + + dev->priv = NULL; +} + +static void vnet_dev_uninit(struct net_device *dev){ + //Vnet *vnet = dev->priv; + dprintf(">\n"); + //dev_put(dev); + dprintf("<\n"); +} + +static struct net_device_stats *vnet_dev_get_stats(struct net_device *dev){ + Vnet *vnet = dev->priv; + //dprintf(">\n"); + return &vnet->stats; +} + +static int vnet_dev_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd){ + int err = 0; + + dprintf(">\n"); + return err; +} + +static int vnet_dev_change_mtu(struct net_device *dev, int mtu){ + int err = 0; + Vnet *vnet = dev->priv; + if (mtu < 68 || mtu > 1500 - vnet->header_n){ + err = -EINVAL; + goto exit; + } + dev->mtu = mtu; + exit: + return err; +} + +static int vnet_dev_set_name(struct net_device *dev){ + int err = 0; + Vnet *vnet = (void*)dev->priv; + + dprintf(">\n"); + dprintf("> vnet=%d\n", vnet->vnet); + snprintf(dev->name, IFNAMSIZ - 1, VNETIF_FMT, vnet->vnet); + if(__dev_get_by_name(dev->name)){ + err = -ENOMEM; + } + dprintf("< err=%d\n", err); + return err; +} + +//============================================================================ +#ifdef CONFIG_VNET_BRIDGE + +#define BRIDGE DEVICE + +void vnet_bridge_fini(Vnet *vnet){ + if(!vnet) return; + if(vnet->bridge){ + br_del_bridge(vnet->bridge->name); + vnet->bridge = NULL; + } +} + +/** Create the bridge for a vnet, and add the + * vnet interface to it. + * + * @param vnet vnet + * @return 0 on success, error code otherwise + */ +int vnet_bridge_init(Vnet *vnet){ + int err = 0; + char bridge[IFNAMSIZ] = {}; + struct net_bridge *br; + vnet->bridge = NULL; + snprintf(bridge, IFNAMSIZ - 1, VNETBR_FMT, vnet->vnet); + rtnl_lock(); + err = br_add_bridge(bridge); + rtnl_unlock(); + if(err){ + dprintf("> Error creating vnet bridge %s: err=%d\n", bridge, err); + goto exit; + } + vnet->bridge = __dev_get_by_name(bridge); + if(!vnet->bridge){ + wprintf("> Vnet bridge %s is null!\n", bridge); + err = -EINVAL; + goto exit; + } + br = dev_bridge(vnet->bridge); + br->stp_enabled = 0; + br->bridge_hello_time = 0; + br->hello_time = 0; + br->bridge_forward_delay = 0; + br->forward_delay = 0; + rtnl_lock(); + err = br_add_if(br, vnet->dev); + rtnl_unlock(); + if(err){ + dprintf("> Error adding vif %s to vnet bridge %s: err=%d\n", + vnet->dev->name, bridge, err); + goto exit; + } + rtnl_lock(); + dev_open(vnet->dev); + dev_open(vnet->bridge); + rtnl_unlock(); + exit: + if(err){ + if(vnet->bridge){ + rtnl_lock(); + br_del_bridge(bridge); + rtnl_unlock(); + vnet->bridge = NULL; + } + } + return err; +} + + +/** Add an interface to the bridge for a vnet. + * + * @param vnet vnet + * @param dev interface + * @return 0 on success, error code otherwise + */ +int vnet_add_if(Vnet *vnet, struct net_device *dev){ + int err = 0; + struct net_device *brdev; + + dprintf(">\n"); + if(!vnet->bridge){ + err = -EINVAL; + goto exit; + } + // Delete the interface from the default bridge. + // todo: Really want to delete it from any bridge it's in. + if(!vnet_get_device(BRIDGE, &brdev)){ + rtnl_lock(); + br_del_if(dev_bridge(brdev), dev); + rtnl_unlock(); + } + dprintf("> br_add_if %s %s\n", vnet->bridge->name, dev->name); + rtnl_lock(); + dev_open(dev); + dev_open(vnet->bridge); + err = br_add_if(dev_bridge(vnet->bridge), dev); + rtnl_unlock(); + exit: + dprintf("< err=%d\n", err); + return err; +} + +int vnet_del_if(Vnet *vnet, struct net_device *dev){ + int err = 0; + + dprintf(">\n"); + if(!vnet->bridge){ + err = -EINVAL; + goto exit; + } + rtnl_lock(); + br_del_if(dev_bridge(vnet->bridge), dev); + rtnl_unlock(); + exit: + dprintf("< err=%d\n", err); + return err; +} + + +/** Create the bridge and virtual interface for a vnet. + * + * @param info vnet + * @return 0 on success, error code otherwise + */ +int Vnet_create(Vnet *info){ + int err = 0; + + dprintf("> %u\n", info->vnet); + err = vnet_dev_add(info); + if(err) goto exit; + dprintf("> vnet_bridge_init\n"); + err = vnet_bridge_init(info); + if(err) goto exit; + dprintf("> Vnet_add...\n"); + err = Vnet_add(info); + exit: + if(err){ + dprintf("> vnet_bridge_fini...\n"); + vnet_bridge_fini(info); + } + dprintf("< err=%d\n", err); + return err; +} + + + +/** Remove the net device for a vnet. + * Clears the dev field of the vnet. + * Safe to call if the vnet or its dev are null. + * + * @param vnet vnet + */ +void vnet_dev_remove(Vnet *vnet){ + if(!vnet) return; + dprintf("> vnet=%u\n", vnet->vnet); + if(vnet->bridge){ + dprintf("> br_del_bridge(%s)\n", vnet->bridge->name); + rtnl_lock(); + br_del_bridge(vnet->bridge->name); + rtnl_unlock(); + vnet->bridge = NULL; + } + if(vnet->dev){ + //dev_put(vnet->dev); + dprintf("> unregister_netdev(%s)\n", vnet->dev->name); + unregister_netdev(vnet->dev); + vnet->dev = NULL; + } + dprintf("<\n"); +} + +//============================================================================ +#else +//============================================================================ + +/** Create the virtual interface for a vnet. + * + * @param info vnet + * @return 0 on success, error code otherwise + */ +int Vnet_create(Vnet *info){ + int err = 0; + + dprintf("> %u\n", info->vnet); + err = vnet_dev_add(info); + if(err) goto exit; + dprintf("> Vnet_add...\n"); + err = Vnet_add(info); + exit: + dprintf("< err=%d\n", err); + return err; +} + +int vnet_add_if(Vnet *vnet, struct net_device *dev){ + int err = -ENOSYS; + return err; +} + + +int vnet_del_if(Vnet *vnet, struct net_device *dev){ + int err = 0; + return err; +} + +/** Remove the net device for a vnet. + * Clears the dev field of the vnet. + * Safe to call if the vnet or its dev are null. + * + * @param vnet vnet + */ +void vnet_dev_remove(Vnet *vnet){ + if(!vnet) return; + dprintf("> vnet=%u\n", vnet->vnet); + if(vnet->dev){ + //dev_put(vnet->dev); + dprintf("> unregister_netdev(%s)\n", vnet->dev->name); + unregister_netdev(vnet->dev); + vnet->dev = NULL; + } + dprintf("<\n"); +} +#endif +//============================================================================ + +static int vnet_dev_open(struct net_device *dev){ + int err = 0; + dprintf(">\n"); + netif_start_queue(dev); + dprintf("<\n"); + return err; +} + +static int vnet_dev_stop(struct net_device *dev){ + int err = 0; + dprintf(">\n"); + netif_stop_queue(dev); + dprintf("<\n"); + return err; +} + +static int vnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev){ + int err = 0; + Vnet *vnet = dev->priv; + + dprintf("> skb=%p\n", skb); + if(vnet->recursion++) { + vnet->stats.collisions++; + vnet->stats.tx_errors++; + wprintf("> recursion!\n"); + dev_kfree_skb(skb); + goto exit; + } + if(!skb){ + err = -EINVAL; + wprintf("> skb NULL!\n"); + goto exit; + } + dprintf("> skb->data=%p skb->mac.raw=%p\n", skb->data, skb->mac.raw); + if(skb->mac.raw < skb->data || skb->mac.raw > skb->nh.raw){ + wprintf("> skb mac duff!\n"); + skb->mac.raw = skb->data; + } + //dev->trans_start = jiffies; + err = vnet_skb_send(skb, vnet->vnet); + if(err < 0){ + vnet->stats.tx_errors++; + } else { + vnet->stats.tx_packets++; + vnet->stats.tx_bytes += skb->len; + } + exit: + vnet->recursion--; + dprintf("<\n"); + return 0; +} + +void vnet_dev_tx_timeout(struct net_device *dev){ + dprintf(">\n"); + //dev->trans_start = jiffies; + //netif_wake_queue(dev); +} + +void vnet_dev_set_multicast_list(struct net_device *dev){ + dprintf(">\n"); +} + +static int (*eth_hard_header)(struct sk_buff *skb, + struct net_device *dev, unsigned short type, + void *daddr, void *saddr, unsigned len) = NULL; + +static int vnet_dev_hard_header(struct sk_buff *skb, + struct net_device *dev, unsigned short type, + void *daddr, void *saddr, unsigned len){ + int err = 0; + dprintf("> skb=%p ethhdr=%p dev=%s len=%u\n", + skb, skb->mac.raw, dev->name, len); + if(saddr){ + dprintf("> saddr=" MACFMT "\n", MAC6TUPLE((unsigned char*)saddr)); + } else { + dprintf("> saddr=NULL\n"); + } + if(daddr){ + dprintf("> daddr=" MACFMT "\n", MAC6TUPLE((unsigned char*)daddr)); + } else { + dprintf("> daddr=NULL\n"); + } + err = eth_hard_header(skb, dev, type, daddr, saddr, len); + dprintf("> eth_hard_header=%d\n", err); + skb->mac.raw = skb->data; + dprintf("> src=" MACFMT " dst=" MACFMT "\n", + MAC6TUPLE(skb->mac.ethernet->h_source), + MAC6TUPLE(skb->mac.ethernet->h_dest)); + dprintf("< err=%d\n", err); + return err; +} + +void vnet_dev_mac(unsigned char *mac){ + static unsigned val = 1; + struct net_device *dev; + + if(vnet_get_device(DEVICE, &dev)){ + mac[0] = 0xAA; + mac[1] = 0xFF; + mac[2] = (unsigned char)((val >> 24) & 0xff); + mac[3] = (unsigned char)((val >> 16) & 0xff); + mac[4] = (unsigned char)((val >> 8) & 0xff); + mac[5] = (unsigned char)((val ) & 0xff); + val++; + } else { + memcpy(mac, dev->dev_addr, ETH_ALEN); + dev_put(dev); + } +} + +static int vnet_dev_init(struct net_device *dev){ + int err = 0; + Vnet *vnet = (void*)dev->priv; + + dprintf(">\n"); + ether_setup(dev); + + if(!eth_hard_header) eth_hard_header = dev->hard_header; + dev->hard_header = vnet_dev_hard_header; + + dev->open = vnet_dev_open; + dev->stop = vnet_dev_stop; + dev->uninit = vnet_dev_uninit; + dev->destructor = vnet_dev_destructor; + dev->hard_start_xmit = vnet_dev_hard_start_xmit; + dev->get_stats = vnet_dev_get_stats; + dev->do_ioctl = vnet_dev_do_ioctl; + dev->change_mtu = vnet_dev_change_mtu; + + dev->tx_timeout = vnet_dev_tx_timeout; + dev->watchdog_timeo = TX_TIMEOUT; + dev->set_multicast_list = vnet_dev_set_multicast_list; + + dev->hard_header_len += vnet->header_n; + dev->mtu -= vnet->header_n; + + vnet_dev_mac(dev->dev_addr); + + dev->flags |= IFF_DEBUG; + dev->flags |= IFF_PROMISC; + dev->flags |= IFF_ALLMULTI; + + dprintf("<\n"); + return err; +} + +/** Add the interface (net device) for a vnet. + * Sets the dev field of the vnet on success. + * Does nothing if the vif already has an interface. + * + * @param vif vif + * @return 0 on success, error code otherwise + */ +int vnet_dev_add(Vnet *vnet){ + int err = 0; + struct net_device *dev = NULL; + + dprintf("> vnet=%p\n", vnet); + if(vnet->dev) goto exit; + vnet->header_n = sizeof(struct iphdr) + sizeof(struct etheriphdr); + dev = kmalloc(sizeof(struct net_device), GFP_ATOMIC); + if(!dev){ err = -ENOMEM; goto exit; } + *dev = (struct net_device){}; + dev->priv = vnet; + vnet->dev = dev; + + err = vnet_dev_set_name(dev); + if(err) goto exit; + vnet_dev_init(dev); + dprintf("> name=%s, register_netdev...\n", dev->name); + err = register_netdev(dev); + dprintf("> register_netdev=%d\n", err); + if(err) goto exit; + rtnl_lock(); + dev_open(dev); + rtnl_unlock(); + + //dev_hold(dev); + exit: + if(err){ + if(dev) kfree(dev); + vnet->dev = NULL; + } + dprintf("< err=%d\n", err); + return err; +} diff --git a/tools/vnet/vnet-module/vnet_dev.h b/tools/vnet/vnet-module/vnet_dev.h new file mode 100644 index 0000000000..168f50c91b --- /dev/null +++ b/tools/vnet/vnet-module/vnet_dev.h @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _VNET_VNET_DEV_H_ +#define _VNET_VNET_DEV_H_ + +struct Vnet; +struct net_device; + +extern int vnet_dev_add(struct Vnet *vnet); +extern void vnet_dev_remove(struct Vnet *vnet); +extern int Vnet_create(struct Vnet *info); +extern int vnet_add_if(struct Vnet *vnet, struct net_device *dev); +extern int vnet_del_if(struct Vnet *vnet, struct net_device *dev); + +#endif diff --git a/tools/vnet/vnet-module/vnet_ioctl.c b/tools/vnet/vnet-module/vnet_ioctl.c new file mode 100644 index 0000000000..5f9f16b2e6 --- /dev/null +++ b/tools/vnet/vnet-module/vnet_ioctl.c @@ -0,0 +1,815 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include + +#include +#include +#include + +#include + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include "vif.h" +#include "vnet.h" +#include "varp.h" +#include "vnet_dev.h" + +#include "sxpr_parser.h" +#include "iostream.h" +#include "kernel_stream.h" +#include "sys_string.h" +#include "sys_net.h" + +#define MODULE_NAME "VNET" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +// Functions to manage vnets. +/* + +Have to rely on ethernet bridging being configured - but we can't rely +on the kernel interface being available to us (it's not exported @!$"%!). + +Create a vnet N: +- create the vnet device vnetifN: using commands to /proc, kernel api +- create the vnet bridge vnetN: using brctl in user-space +- for best results something should keep track of the mapping vnet id <-> bridge name + +Add vif device vifD.N to vnet N. +- domain is configured with vifD.N on bridge vnetN +- vif script adds vif to bridge using brctl +- vif script detects that the bridge is a vnet bridge and + uses /proc commands to configure the mac on the vnet + +Wouldn't be hard to add support for specifying vnet keys(s) in +the control interface. + +*/ + + // id vnet id + // security security level + // ciphersuite: digest, cipher, keys?? +/* Security policy. + vnet + src: mac + dst: mac + coa: ip + Map vnet x coa -> security (none, auth, conf) + + Policy, e.g. + - same subnet x vnet + - diff subnet x vnet + - some subnet x vnet + - some host addr x vnet + + (security (net local) (vnet *) (mode none)) + (security (net (not local)) + + (security (addr, vnet) (local-subnet addr) none) + (security (addr, vnet) (not (local-subnet addr)) conf) + (security (addr, vnet) (host 15.144.27.80) + (security (addr, vnet) (subnet addr 15.144.24.0/24) auth) + (security (addr, vnet) t auth) + + (security (addr local) (mode none)) + (security (addr local/16) (mode none)) + (security (addr 15.144.0.0/16) (mode auth)) + (security (addr 15.0.0.0/8) (mode conf)) + (security (addr *) (mode drop)) + + ?Varp security + Use esp too - none, auth, conf, + Varp sends broadcasts (requests) and unicasts (replies). + Uses UDP. Could send over ESP if needed. + For bcast don't know where it goes, so security has to be by vnet. + For ucast know where it goes, so could do by vnet and addr. + + Similar issue for vnets: know where unicast goes but don't know where + bcast goes. + + Simplify: 2 levels + local ucast + nonlocal ucast, mcast + + (security (local none) (nonlocal conf)) + (security (local auth) (nonlocal conf)) + + VARP security matches vnet security. + + */ + +/** @file + * + * Kernel interface to files in /proc. + */ + +#define PROC_ROOT "/proc/" +#define PROC_ROOT_LEN 6 +#define MODULE_ROOT PROC_ROOT "vnet" + +enum { + VNET_POLICY = 1, +}; + +typedef struct proc_dir_entry ProcEntry; +typedef struct inode Inode; +typedef struct file File; + +static int proc_open_fn(struct inode *inode, File *file); +static ssize_t proc_read_fn(File *file, char *buffer, size_t count, loff_t *offset); +static ssize_t proc_write_fn(File *file, const char *buffer, size_t count, loff_t *offset) ; +//static int proc_flush_fn(File *file); +static loff_t proc_lseek_fn(File * file, loff_t offset, int orig); +static int proc_ioctl_fn(struct inode *inode, File *file, unsigned opcode, unsigned long arg); +static int proc_release_fn(struct inode *inode, File *file); + +static int eval(Sxpr exp); + +static int ProcEntry_has_name(ProcEntry *entry, const char *name, int namelen){ + dprintf("> name=%.*s entry=%.*s\n", namelen, name, entry->namelen, entry->name); + if(!entry || !entry->low_ino) return FALSE; + if(entry->namelen != namelen) return FALSE; + return memcmp(name, entry->name, namelen) == 0; +} + +// Set f->f_error on error? +// Does interface stop r/w on first error? +// Is release called after an error? +// + +static struct file_operations proc_file_ops = { + //owner: THIS_MODULE, + open: proc_open_fn, + read: proc_read_fn, + write: proc_write_fn, + //flush: proc_flush_fn, + llseek: proc_lseek_fn, + ioctl: proc_ioctl_fn, + release: proc_release_fn, +}; + +static int proc_get_parser(File *file, Parser **val){ + int err = 0; + Parser *parser = NULL; + parser = file->private_data; + if(!parser){ + parser = Parser_new(); + if(!parser){ + err = -ENOMEM; + goto exit; + } + file->private_data = parser; + } + exit: + *val = parser; + return err; +} + +static int proc_open_fn(Inode *inode, File *file){ + // User open. + // Return errcode or 0 on success. + // Can stuff data in file->private_data (void*). + // Get entry from + //ProcEntry *entry = (ProcEntry *)inode->u.generic_ip; + //file->private_data = NULL; + // Check for user privilege - deny otherwise. + // -EACCESS + int err = 0; + dprintf(">\n"); + file->private_data = NULL; + return err; +} + +static ssize_t proc_read_fn(File *file, char *buffer, + size_t count, loff_t *offset){ + // User read. + // Copy data to user buffer, increment offset by count, return count. + dprintf(">\n"); + count = 0; + //if(copy_to_user(buffer, data, count)){ + // return -EFAULT; + //} + //*offset += count; + return count; +} + +static ssize_t proc_write_fn(File *file, const char *buffer, + size_t count, loff_t *offset) { + // User write. + // Copy data into kernel space from buffer. + // Increment offset by count, return count (or code). + int err = 0; + char *data = NULL; + Parser *parser = NULL; + + //dprintf("> count=%d\n", count); + err = proc_get_parser(file, &parser); + if(err) goto exit; + data = allocate(count); + if(!data){ + err = -ENOMEM; + goto exit; + } + err = copy_from_user(data, buffer, count); + if(err) goto exit; + *offset += count; + err = Parser_input(parser, data, count); + exit: + deallocate(data); + err = (err < 0 ? err : count); + //dprintf("< err = %d\n", err); + return err; +} + +#if 0 +static int proc_flush_fn(File *file){ + // User flush. + int writing = (file->f_flags & O_ACCMODE) == O_WRONLY; + int f_count = atomic_read(&file->f_count); + if (writing && f_count == 1) { + ProcEntry *pentry = (ProcEntry *)file->f_dentry->d_inode->u.generic_ip; + // ... + } + return retval; +} +#endif + +#ifndef SEEK_SET +enum { + /** Offset from start. */ + SEEK_SET = 0, + /** Offset from current position. */ + SEEK_CUR = 1, + /** Offset from size of file. */ + SEEK_END = 2 +}; +#endif /* !SEEK_SET */ + +static loff_t proc_lseek_fn(File * file, loff_t offset, int from){ + // User lseek. + dprintf(">\n"); + switch(from){ + case SEEK_SET: + break; + case SEEK_CUR: + offset += file->f_pos; + break; + case SEEK_END: + return -EINVAL; + default: + return -EINVAL; + } + if(offset < 0) return -EINVAL; + file->f_pos = offset; + return offset; +} + +static int proc_ioctl_fn(Inode *inode, File *file, + unsigned opcode, unsigned long arg){ + // User ioctl. + dprintf(">\n"); + return 0; +} + +static int proc_release_fn(Inode *inode, File *file){ + // User close. + // Cleanup file->private_data, return errcode. + int err = 0; + Parser *parser = NULL; + Sxpr obj, l; + + dprintf(">\n"); + err = proc_get_parser(file, &parser); + if(err) goto exit; + err = Parser_input(parser, NULL, 0); + if(err) goto exit; + obj = parser->val; + objprint(iostdout, obj, 0); IOStream_print(iostdout, "\n"); + for(l = obj; CONSP(l); l = CDR(l)){ + err = eval(CAR(l)); + if(err) break; + } + exit: + Parser_free(parser); + file->private_data = NULL; + dprintf("< err=%d\n", err); + return err; +} + +static ProcEntry *proc_fs_root = &proc_root; + +static int proc_path_init(const char *path, const char **rest){ + int err = 0; + + if(!path){ + err = -EINVAL; + goto exit; + } + if(*path == '/'){ + if(strncmp(PROC_ROOT, path, PROC_ROOT_LEN)){ + err = -EINVAL; + } else { + path += PROC_ROOT_LEN; + } + } + exit: + *rest = path; + return err; +} + + +/** Parse a path relative to `dir'. If dir is null or the proc root + * the path is relative to "/proc/", and the leading "/proc/" may be + * supplied. + * + */ +static ProcEntry * ProcFS_lookup(const char *path, ProcEntry *dir){ + const char *pathptr = path, *next = NULL; + ProcEntry *entry, *result = NULL; + int pathlen; + + if(dir && (dir != proc_fs_root)){ + entry = dir; + } else { + if(proc_path_init(path, &pathptr)) goto exit; + entry = proc_fs_root; + } + if(!pathptr || !*pathptr) goto exit; + while(1){ + next = strchr(pathptr, '/'); + pathlen = (next ? next - pathptr : strlen(pathptr)); + for(entry = entry->subdir; entry ; entry = entry->next) { + if(ProcEntry_has_name(entry, pathptr, pathlen)) break; + } + if (!entry) break; + if(!next){ + result = entry; + break; + } + pathptr = next + 1; + } + exit: + return result; +} + +static ProcEntry *ProcFS_register(const char *name, ProcEntry *dir, int val){ + mode_t mode = 0; + ProcEntry *entry; + + entry = create_proc_entry(name, mode, dir); + if(entry){ + entry->proc_fops = &proc_file_ops; + entry->data = (void*)val; // Whatever data we need. + } + return entry; +} + +static ProcEntry *ProcFS_mkdir(const char *name, ProcEntry *parent){ + ProcEntry *entry = NULL; + entry = ProcFS_lookup(name, parent); + if(!entry){ + const char *path; + if(proc_path_init(name, &path)) goto exit; + entry = proc_mkdir(path, parent); + } + exit: + return entry; +} + +static void ProcFS_remove(const char *name, ProcEntry *parent){ + remove_proc_entry(name, parent); +} + +static void ProcFS_rmrec_entry(ProcEntry *entry){ + if(entry){ + // Don't want to remove /proc itself! + if(entry->parent == entry) return; + while(entry->subdir){ + ProcFS_rmrec_entry(entry->subdir); + } + dprintf("> remove %s\n", entry->name); + ProcFS_remove(entry->name, entry->parent); + } +} + +static void ProcFS_rmrec(const char *name, ProcEntry *parent){ + ProcEntry *entry; + + dprintf("> name=%s\n", name); + entry = ProcFS_lookup(name, parent); + if(entry){ + ProcFS_rmrec_entry(entry); + } + dprintf("<\n"); +} + +static int stringof(Sxpr exp, char **s){ + int err = 0; + if(ATOMP(exp)){ + *s = atom_name(exp); + } else if(STRINGP(exp)){ + *s = string_string(exp); + } else { + err = -EINVAL; + *s = NULL; + } + return err; +} + +static int child_string(Sxpr exp, Sxpr key, char **s){ + int err = 0; + Sxpr val = sxpr_child_value(exp, key, ONONE); + err = stringof(val, s); + return err; +} + +static int intof(Sxpr exp, int *v){ + int err = 0; + char *s; + unsigned long l; + if(INTP(exp)){ + *v = OBJ_INT(exp); + } else { + err = stringof(exp, &s); + if(err) goto exit; + err = convert_atoul(s, &l); + *v = (int)l; + } + exit: + return err; +} + +static int child_int(Sxpr exp, Sxpr key, int *v){ + int err = 0; + Sxpr val = sxpr_child_value(exp, key, ONONE); + err = intof(val, v); + return err; +} + +static int macof(Sxpr exp, unsigned char *v){ + int err = 0; + char *s; + err = stringof(exp, &s); + if(err) goto exit; + err = mac_aton(s, v); + exit: + return err; +} + +static int child_mac(Sxpr exp, Sxpr key, unsigned char *v){ + int err = 0; + Sxpr val = sxpr_child_value(exp, key, ONONE); + err = macof(val, v); + return err; +} + +static int addrof(Sxpr exp, uint32_t *v){ + int err = 0; + char *s; + unsigned long w; + err = stringof(exp, &s); + if(err) goto exit; + err = get_inet_addr(s, &w); + if(err) goto exit; + *v = (uint32_t)w; + exit: + return err; +} + +static int child_addr(Sxpr exp, Sxpr key, uint32_t *v){ + int err = 0; + Sxpr val = sxpr_child_value(exp, key, ONONE); + err = addrof(val, v); + return err; +} + +/** Create a vnet. + * It is an error if a vnet with the same id exists. + * + * @param vnet vnet id + * @param security security level + * @return 0 on success, error code otherwise + */ +static int ctrl_vnet_add(int vnet, int security){ + int err = 0; + Vnet *vnetinfo = NULL; + if(Vnet_lookup(vnet, &vnetinfo) == 0){ + err = -EEXIST; + goto exit; + } + err = Vnet_alloc(&vnetinfo); + if(err) goto exit; + vnetinfo->vnet = vnet; + vnetinfo->security = security; + err = Vnet_create(vnetinfo); + exit: + if(vnetinfo) Vnet_decref(vnetinfo); + return err; +} + +/** Delete a vnet. + * + * @param vnet vnet id + * @return 0 on success, error code otherwise + */ +static int ctrl_vnet_del(int vnet){ + int err = -ENOSYS; + // Can't delete if there are any vifs on the vnet. + //Vnet_del(vnet); + return err; +} + +/** Create an entry for a vif with the given vnet and vmac. + * + * @param vnet vnet id + * @param vmac mac address + * @return 0 on success, error code otherwise + */ +static int ctrl_vif_add(int vnet, Vmac *vmac){ + int err = 0; + Vnet *vnetinfo = NULL; + Vif *vif = NULL; + + dprintf(">\n"); + err = Vnet_lookup(vnet, &vnetinfo); + if(err) goto exit; + err = vif_add(vnet, vmac, &vif); + exit: + if(vnetinfo) Vnet_decref(vnetinfo); + if(vif) vif_decref(vif); + dprintf("< err=%d\n", err); + return err; +} + +/** Add net device 'vifname' to the bridge for 'vnet' and + * create an entry for a vif with the given vnet and vmac. + * This is used when device 'vifname' is a virtual device + * connected to a vif in a vm. + * + * @param vifname name of device to bridge + * @param vnet vnet id + * @param vmac mac address + * @return 0 on success, error code otherwise + */ +static int ctrl_vif_conn(char *vifname, int vnet, Vmac *vmac){ + int err = 0; + Vnet *vnetinfo = NULL; + struct net_device *vifdev = NULL; + Vif *vif = NULL; + + dprintf("> %s\n", vifname); + err = Vnet_lookup(vnet, &vnetinfo); + if(err) goto exit; + err = vif_add(vnet, vmac, &vif); + if(err) goto exit; + err = vnet_get_device(vifname, &vifdev); + if(err) goto exit; + vif->dev = vifdev; + err = vnet_add_if(vnetinfo, vifdev); + exit: + if(vnetinfo) Vnet_decref(vnetinfo); + if(vif) vif_decref(vif); + if(vifdev) dev_put(vifdev); + dprintf("< err=%d\n", err); + return err; +} + +/** Delete a vif. + * + * @param vnet vnet id + * @param vmac mac address + * @return 0 on success, error code otherwise + */ +static int ctrl_vif_del(int vnet, Vmac *vmac){ + int err = 0; + Vnet *vnetinfo = NULL; + Vif *vif = NULL; + + dprintf(">\n"); + err = Vnet_lookup(vnet, &vnetinfo); + if(err) goto exit; + err = vif_lookup(vnet, vmac, &vif); + if(err) goto exit; + if(vif->dev){ + vnet_del_if(vnetinfo, vif->dev); + vif->dev = NULL; + } + vif_remove(vnet, vmac); + exit: + if(vnetinfo) Vnet_decref(vnetinfo); + if(vif) vif_decref(vif); + dprintf("< err=%d\n", err); + return err; +} + +/** (varp.print) + */ +static int eval_varp_print(Sxpr exp){ + int err = 0; + varp_print(); + return err; +} + +/** (varp.mcaddr (addr )) + */ +static int eval_varp_mcaddr(Sxpr exp){ + int err =0; + Sxpr oaddr = intern("addr"); + uint32_t addr; + + err = child_addr(exp, oaddr, &addr); + if(err < 0) goto exit; + varp_set_mcast_addr(addr); + exit: + return err; +} + +/** (vnet.add (id ) [(security { none | auth | conf } )] ) + */ +static int eval_vnet_add(Sxpr exp){ + int err = 0; + Sxpr oid = intern("id"); + Sxpr osecurity = intern("security"); + Sxpr csecurity; + int id; + char *security; + int sec; + err = child_int(exp, oid, &id); + if(err) goto exit; + if(id < VNET_VIF){ + err = -EINVAL; + goto exit; + } + csecurity = sxpr_child_value(exp, osecurity, intern("none")); + err = stringof(csecurity, &security); + if(err) goto exit; + if(strcmp(security, "none")==0){ + sec = 0; + } else if(strcmp(security, "auth")==0){ + sec = SA_AUTH; + } else if(strcmp(security, "conf")==0){ + sec = SA_CONF; + } else { + err = -EINVAL; + goto exit; + } + dprintf("> vnet id=%d\n", id); + err = ctrl_vnet_add(id, sec); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Delete a vnet. + * + * (vnet.del (id )) + * + * @param vnet vnet id + * @return 0 on success, error code otherwise + */ +static int eval_vnet_del(Sxpr exp){ + int err = 0; + Sxpr oid = intern("id"); + int id; + + err = child_int(exp, oid, &id); + if(err) goto exit; + err = ctrl_vnet_del(id); + exit: + return err; +} + +/** (vif.add (vnet ) (vmac )) + */ +static int eval_vif_add(Sxpr exp){ + int err = 0; + Sxpr ovnet = intern("vnet"); + Sxpr ovmac = intern("vmac"); + int vnet; + Vmac vmac = {}; + + err = child_int(exp, ovnet, &vnet); + if(err) goto exit; + err = child_mac(exp, ovmac, vmac.mac); + if(err) goto exit; + err = ctrl_vif_add(vnet, &vmac); + exit: + return err; +} + +/** (vif.conn (vif ) (vnet ) (vmac )) + */ +static int eval_vif_conn(Sxpr exp){ + int err = 0; + Sxpr ovif = intern("vif"); + Sxpr ovnet = intern("vnet"); + Sxpr ovmac = intern("vmac"); + char *vif = NULL; + int vnet = 0; + Vmac vmac = {}; + + err = child_string(exp, ovif, &vif); + if(err) goto exit; + err = child_int(exp, ovnet, &vnet); + if(err) goto exit; + err = child_mac(exp, ovmac, vmac.mac); + dprintf("> connect vif=%s vnet=%d\n", vif, vnet); + err = ctrl_vif_conn(vif, vnet, &vmac); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** (vif.del (vnet ) (vmac )) + */ +static int eval_vif_del(Sxpr exp){ + int err = 0; + Sxpr ovnet = intern("vnet"); + Sxpr ovmac = intern("vmac"); + int vnet; + Vmac vmac = {}; + + err = child_int(exp, ovnet, &vnet); + if(err) goto exit; + err = child_mac(exp, ovmac, vmac.mac); + if(err) goto exit; + err = ctrl_vif_del(vnet, &vmac); + exit: + return err; +} + +typedef struct SxprEval { + Sxpr elt; + int (*fn)(Sxpr); +} SxprEval; + +static int eval(Sxpr exp){ + int err = 0; + SxprEval defs[] = { + { intern("varp.print"), eval_varp_print }, + { intern("varp.mcaddr"), eval_varp_mcaddr }, + { intern("vif.add"), eval_vif_add }, + { intern("vif.conn"), eval_vif_conn }, + { intern("vif.del"), eval_vif_del }, + { intern("vnet.add"), eval_vnet_add }, + { intern("vnet.del"), eval_vnet_del }, + { ONONE, NULL } }; + SxprEval *def; + + dprintf(">\n"); + err = -EINVAL; + for(def = defs; !NONEP(def->elt); def++){ + if(sxpr_elementp(exp, def->elt)){ + err = def->fn(exp); + break; + } + } + dprintf("< err=%d\n", err); + return err; +} + +void __init ProcFS_init(void){ + ProcEntry *root_entry; + ProcEntry *policy_entry; + + dprintf(">\n"); + root_entry = ProcFS_mkdir(MODULE_ROOT, NULL); + if(!root_entry) goto exit; + policy_entry = ProcFS_register("policy", root_entry, VNET_POLICY); + exit: + dprintf("<\n"); +} + +void __exit ProcFS_exit(void){ + dprintf(">\n"); + ProcFS_rmrec(MODULE_ROOT, NULL); + dprintf("<\n"); +} diff --git a/tools/vnet/vnet-module/vnet_ioctl.h b/tools/vnet/vnet-module/vnet_ioctl.h new file mode 100644 index 0000000000..e57763284b --- /dev/null +++ b/tools/vnet/vnet-module/vnet_ioctl.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free software Foundation, Inc., + * 59 Temple Place, suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _VNET_VNET_IOCTL_H_ +#define _VNET_VNET_IOCTL_H_ + +extern void ProcFS_init(void); +extern void ProcFS_exit(void); + +#endif /* ! _VNET_VNET_IOCTL_H_ */ diff --git a/tools/vnet/vnetd/Makefile b/tools/vnet/vnetd/Makefile new file mode 100644 index 0000000000..69d4c0269c --- /dev/null +++ b/tools/vnet/vnetd/Makefile @@ -0,0 +1,103 @@ +# -*- mode: Makefile; -*- +#---------------------------------------------------------------------------- +# Copyright (C) 2004 Mike Wray . +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of the +# License, or (at your option) any later version. This library is +# distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this library; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#---------------------------------------------------------------------------- + +all: vnetd + +#---------------------------------------------------------------------------- + +XEN_ROOT = ../../.. +include $(XEN_ROOT)/tools/Make.defs + +VNETD_INSTALL_DIR = /usr/sbin + +LIB_DIR = $(XEN_LIBXUTIL) +VNET_DIR = ../vnet-module + +INCLUDES += -I$(LIB_DIR) +INCLUDES += -I$(VNET_DIR) + +#---------------------------------------------------------------------------- +# GC. +GC_DIR:=../gc/install +GC_INCLUDE:= $(GC_DIR)/include +GC_LIB_DIR:=$(GC_DIR)/lib + +INCLUDES += -I$(GC_INCLUDE) +#LIBS += -L$(GC_LIB_DIR) +CPPFLAGS += -D USE_GC + +#---------------------------------------------------------------------------- +CFLAGS += -g +CFLAGS += -Wall +CFLAGS += $(INCLUDES) $(LIBS) + +LDFLAGS += $(LIBS) + +# Dependencies. Gcc generates them for us. +CFLAGS += -Wp,-MD,.$(@F).d +PROG_DEP = .*.d + +vpath %.c $(LIB_DIR) + +IPATHS:=$(INCLUDES:-I=) +vpath %.h $(IPATHS) + +#---------------------------------------------------------------------------- +VNETD_SRC:= +VNETD_SRC+= connection.c +VNETD_SRC+= marshal.c +VNETD_SRC+= select.c +VNETD_SRC+= timer.c +VNETD_SRC+= vcache.c +VNETD_SRC+= vnetd.c + +LIB_SRC:= +LIB_SRC+= allocate.c +LIB_SRC+= enum.c +LIB_SRC+= file_stream.c +LIB_SRC+= hash_table.c +LIB_SRC+= iostream.c +LIB_SRC+= lexis.c +LIB_SRC+= socket_stream.c +LIB_SRC+= string_stream.c +LIB_SRC+= sxpr.c +LIB_SRC+= sys_net.c +LIB_SRC+= sys_string.c +LIB_SRC+= util.c + +VNETD_SRC+=$(LIB_SRC) + +VNETD_OBJ := $(VNETD_SRC:.c=.o) + +#VNETD_LIBS:= $(GC_LIB_DIR)/libgc.so.1.0.2 +#VNETD_LIBS:= -lgc +VNETD_LIBS:= $(GC_LIB_DIR)/libgc.a + +vnetd: $(VNETD_OBJ) + $(CC) $(CFLAGS) -o $@ $^ $(VNETD_LIBS) -ldl -lpthread + +install: vnetd + mkdir -p $(prefix)/$(VNETD_INSTALL_DIR) + install -m 0755 vnetd $(prefix)/$(VNETD_INSTALL_DIR) + +clean: + -rm -f *.a *.o *~ + -rm -f vnetd + -rm -f $(PROG_DEP) + +-include $(PROG_DEP) diff --git a/tools/vnet/vnetd/connection.c b/tools/vnet/vnetd/connection.c new file mode 100644 index 0000000000..6571f70762 --- /dev/null +++ b/tools/vnet/vnetd/connection.c @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2003 - 2004 Mike Wray . + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. This library is + * distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include + +#include "allocate.h" +#include "connection.h" +#include "file_stream.h" +#include "socket_stream.h" + +#define DEBUG +#undef DEBUG +#define MODULE_NAME "conn" +#include "debug.h" + +/** Initialize a file stream from a file desciptor. + * + * @param fd file descriptor + * @param mode file mode + * @param buffered make the stream buffered if 1, unbuffered if 0 + * @param io return parameter for the stream + * @return 0 on success, error code otherwise + */ +static int stream_init(int fd, const char *mode, int buffered, IOStream **io){ + int err = 0; + *io = file_stream_fdopen(fd, mode); + if(!*io){ + err = -errno; + perror("fdopen"); + goto exit; + } + if(!buffered){ + // Make unbuffered. + err = file_stream_setvbuf(*io, NULL, _IONBF, 0); + if(err){ + err = -errno; + perror("setvbuf"); + goto exit; + } + } + exit: + if(err && *io){ + IOStream_close(*io); + *io = NULL; + } + return err; +} + +ConnList * ConnList_add(Conn *conn, ConnList *l){ + ConnList *v; + v = ALLOCATE(ConnList); + v->conn = conn; + v->next =l; + return v; +} + +Conn *Conn_new(int (*fn)(Conn *), void *data){ + Conn *conn; + conn = ALLOCATE(Conn); + conn->fn = fn; + conn->data = data; + return conn; +} + +int Conn_handle(Conn *conn){ + int err = 0; + dprintf(">\n"); + if(conn->fn){ + err = conn->fn(conn); + } else { + dprintf("> no handler\n"); + err = -ENOSYS; + } + if(err < 0){ + Conn_close(conn); + } + dprintf("< err=%d\n", err); + return err; +} + +/** Initialize a connection. + * + * @param conn connection + * @param sock socket + * @param ipaddr ip address + * @return 0 on success, error code otherwise + */ +int Conn_init(Conn *conn, int sock, int type, struct sockaddr_in addr){ + int err = 0; + conn->addr = addr; + conn->type = type; + conn->sock = sock; + if(type == SOCK_STREAM){ + err = stream_init(sock, "r", 0, &conn->in); + if(err) goto exit; + err = stream_init(sock, "w", 0, &conn->out); + if(err) goto exit; + } else { + conn->in = socket_stream_new(sock); + conn->out = socket_stream_new(sock); + socket_stream_set_addr(conn->out, &addr); + } + exit: + if(err) eprintf("< err=%d\n", err); + return err; +} + +/** Open a connection. + * + * @param conn connection + * @param socktype socket type + * @param ipaddr ip address to connect to + * @param port port + * @return 0 on success, error code otherwise + */ +int Conn_connect(Conn *conn, int socktype, struct in_addr ipaddr, uint16_t port){ + int err = 0; + int sock; + struct sockaddr_in addr_in; + struct sockaddr *addr = (struct sockaddr *)&addr_in; + socklen_t addr_n = sizeof(addr_in); + dprintf("> addr=%s:%d\n", inet_ntoa(ipaddr), ntohs(port)); + sock = socket(AF_INET, socktype, 0); + if(sock < 0){ + err = -errno; + goto exit; + } + addr_in.sin_family = AF_INET; + addr_in.sin_addr = ipaddr; + addr_in.sin_port = port; + err = connect(sock, addr, addr_n); + if(err) goto exit; + err = Conn_init(conn, sock, socktype, addr_in); + exit: + if(err) eprintf("< err=%d\n", err); + return err; +} + +/** Close a connection. + * + * @param conn connection + */ +void Conn_close(Conn *conn){ + if(!conn) return; + if(conn->in) IOStream_close(conn->in); + if(conn->out) IOStream_close(conn->out); + shutdown(conn->sock, 2); +} diff --git a/tools/vnet/vnetd/connection.h b/tools/vnet/vnetd/connection.h new file mode 100644 index 0000000000..198f28ed90 --- /dev/null +++ b/tools/vnet/vnetd/connection.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2003 - 2004 Mike Wray . + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. This library is + * distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _VNET_CONNECTION_H_ +#define _VNET_CONNECTION_H_ + +#include + +#include "iostream.h" + +/** A connection. + * The underlying transport is a socket. + * Contains in and out streams using the socket. + */ +typedef struct Conn { + struct sockaddr_in addr; + int sock; + int type; + IOStream *in; + IOStream *out; + int (*fn)(struct Conn *); + void *data; +} Conn; + +typedef struct ConnList { + Conn *conn; + struct ConnList *next; +} ConnList; + +extern ConnList * ConnList_add(Conn *conn, ConnList *l); + +extern Conn * Conn_new(int (*fn)(struct Conn *), void *data); +extern int Conn_init(Conn *conn, int sock, int type, struct sockaddr_in addr); +extern int Conn_connect(Conn *conn, int type, struct in_addr ipaddr, uint16_t port); +extern int Conn_handle(Conn *conn); +extern void Conn_close(Conn *conn); + +#endif /* ! _VNET_CONNECTION_H_ */ diff --git a/tools/vnet/vnetd/marshal.c b/tools/vnet/vnetd/marshal.c new file mode 100644 index 0000000000..694b6eaa05 --- /dev/null +++ b/tools/vnet/vnetd/marshal.c @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2001 - 2004 Mike Wray . + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. This library is + * distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include "sys_net.h" +#include "allocate.h" +#include "marshal.h" + +#define MODULE_NAME "marshal" +#define DEBUG +#undef DEBUG +#include "debug.h" + +#define ARRAY_SIZE(ary) (sizeof(ary)/sizeof((ary)[0])) + +/* Messages are coded as msgid followed by message fields. + * Initial message on any channel is hello - so can check version + * compatibility. + * + * char* -> uint16_t:n + * ints/uints go as suitable number of bytes (e.g. uint16_t is 2 bytes). + * optional fields go as '1' or '0' (the 0/1 is 1 byte). + * lists go as ('1' )* '0' + */ + +int marshal_flush(IOStream *io){ + int err = 0; + err = IOStream_flush(io); + return err; +} + +int marshal_bytes(IOStream *io, void *s, uint32_t s_n){ + int err = 0; + int n; + n = IOStream_write(io, s, s_n); + if(n < 0){ + err = n; + } else if (n < s_n){ + dprintf("> Wanted %d, got %d\n", s_n, n); + err = -EIO; + } + return err; +} + +int unmarshal_bytes(IOStream *io, void *s, uint32_t s_n){ + int err = 0; + int n; + //dprintf("> s_n=%d\n", s_n); + n = IOStream_read(io, s, s_n); + //dprintf("> n=%d\n", n); + if(n < 0){ + err = n; + } else if(n < s_n){ + dprintf("> Wanted %d, got %d\n", s_n, n); + err = -EIO; + } + //dprintf("< err=%d\n", err); + return err; +} + +int marshal_uint8(IOStream *io, uint8_t x){ + return marshal_bytes(io, &x, sizeof(x)); +} + +int unmarshal_uint8(IOStream *io, uint8_t *x){ + return unmarshal_bytes(io, x, sizeof(*x)); +} + +int marshal_uint16(IOStream *io, uint16_t x){ + x = htons(x); + return marshal_bytes(io, &x, sizeof(x)); +} + +int unmarshal_uint16(IOStream *io, uint16_t *x){ + int err = 0; + err = unmarshal_bytes(io, x, sizeof(*x)); + *x = ntohs(*x); + return err; +} + +int marshal_int32(IOStream *io, int32_t x){ + int err = 0; + //dprintf("> x=%d\n", x); + x = htonl(x); + err = marshal_bytes(io, &x, sizeof(x)); + //dprintf("< err=%d\n", err); + return err; +} + +int unmarshal_int32(IOStream *io, int32_t *x){ + int err = 0; + //dprintf(">\n"); + err = unmarshal_bytes(io, x, sizeof(*x)); + *x = ntohl(*x); + //dprintf("< err=%d x=%d\n", err, *x); + return err; +} + +int marshal_uint32(IOStream *io, uint32_t x){ + int err = 0; + //dprintf("> x=%u\n", x); + x = htonl(x); + err = marshal_bytes(io, &x, sizeof(x)); + //dprintf("< err=%d\n", err); + return err; +} + +int unmarshal_uint32(IOStream *io, uint32_t *x){ + int err = 0; + //dprintf(">\n"); + err = unmarshal_bytes(io, x, sizeof(*x)); + *x = ntohl(*x); + //dprintf("< err=%d x=%u\n", err, *x); + return err; +} + +int marshal_uint64(IOStream *io, uint64_t x){ + int err; + err = marshal_uint32(io, (uint32_t) ((x >> 32) & 0xffffffff)); + if(err) goto exit; + err = marshal_uint32(io, (uint32_t) ( x & 0xffffffff)); + exit: + return err; +} + +int unmarshal_uint64(IOStream *io, uint64_t *x){ + int err = 0; + uint32_t hi, lo; + err = unmarshal_uint32(io, &hi); + if(err) goto exit; + err = unmarshal_uint32(io, &lo); + *x = (((uint64_t) hi) << 32) | lo; + exit: + return err; +} + +int marshal_net16(IOStream *io, net16_t x){ + return marshal_bytes(io, &x, sizeof(x)); +} + +int unmarshal_net16(IOStream *io, net16_t *x){ + int err = 0; + err = unmarshal_bytes(io, x, sizeof(*x)); + return err; +} + +int marshal_net32(IOStream *io, net32_t x){ + return marshal_bytes(io, &x, sizeof(x)); +} + +int unmarshal_net32(IOStream *io, net32_t *x){ + int err = 0; + err = unmarshal_bytes(io, x, sizeof(*x)); + return err; +} + +int marshal_string(IOStream *io, char *s, uint32_t s_n){ + int err; + //dprintf("> s=%s\n", s); + err = marshal_uint32(io, s_n); + if(err) goto exit; + err = marshal_bytes(io, s, s_n); + exit: + //dprintf("< err=%d\n", err); + return err; +} + +int unmarshal_string(IOStream *io, char *s, uint32_t s_n){ + int err = 0, val_n = 0; + //dprintf(">\n"); + err = unmarshal_uint32(io, &val_n); + if(err) goto exit; + if(val_n >= s_n){ + err = -EINVAL; + goto exit; + } + err = unmarshal_bytes(io, s, val_n); + if(err) goto exit; + s[val_n] = '\0'; + exit: + //dprintf("< err=%d s=%s\n", err, s); + return err; +} + +int unmarshal_new_string(IOStream *io, char **s, uint32_t *s_n){ + int err = 0, val_n = 0; + char *val = NULL; + //dprintf(">\n"); + err = unmarshal_uint32(io, &val_n); + if(err) goto exit; + val = allocate(val_n + 1); + if(!val){ + err = -ENOMEM; + goto exit; + } + err = unmarshal_bytes(io, val, val_n); + if(err) goto exit; + val[val_n] = '\0'; + exit: + if(err){ + if(val) deallocate(val); + val = NULL; + val_n = 0; + } + *s = val; + if(s_n) *s_n = val_n; + //dprintf("< err=%d s=%s\n", err, *s); + return err; +} diff --git a/tools/vnet/vnetd/marshal.h b/tools/vnet/vnetd/marshal.h new file mode 100644 index 0000000000..d9cdfb0677 --- /dev/null +++ b/tools/vnet/vnetd/marshal.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2004 Mike Wray . + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. This library is + * distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _XEN_LIB_MARSHAL_H_ +#define _XEN_LIB_MARSHAL_H_ + +#include "iostream.h" + +/** A 16-bit uint in network order, e.g. a port number. */ +typedef uint16_t net16_t; + +/** A 32-bit uint in network order, e.g. an IP address. */ +typedef uint32_t net32_t; + +extern int marshal_flush(IOStream *io); + +extern int marshal_bytes(IOStream *io, void *s, uint32_t s_n); +extern int unmarshal_bytes(IOStream *io, void *s, uint32_t s_n); + +extern int marshal_uint8(IOStream *io, uint8_t x); +extern int unmarshal_uint8(IOStream *io, uint8_t *x); + +extern int marshal_uint16(IOStream *io, uint16_t x); +extern int unmarshal_uint16(IOStream *io, uint16_t *x); + +extern int marshal_uint32(IOStream *io, uint32_t x); +extern int unmarshal_uint32(IOStream *io, uint32_t *x); + +extern int marshal_int32(IOStream *io, int32_t x); +extern int unmarshal_int32(IOStream *io, int32_t *x); + +extern int marshal_uint64(IOStream *io, uint64_t x); +extern int unmarshal_uint64(IOStream *io, uint64_t *x); + +extern int marshal_net16(IOStream *io, net16_t x); +extern int unmarshal_net16(IOStream *io, net16_t *x); + +extern int marshal_net32(IOStream *io, net32_t x); +extern int unmarshal_net32(IOStream *io, net32_t *x); + +extern int marshal_string(IOStream *io, char *s, uint32_t s_n); +extern int unmarshal_string(IOStream *io, char *s, uint32_t s_n); +extern int unmarshal_new_string(IOStream *io, char **s, uint32_t *s_n); + +#endif /* ! _XEN_LIB_MARSHAL_H_ */ diff --git a/tools/vnet/vnetd/select.c b/tools/vnet/vnetd/select.c new file mode 100644 index 0000000000..483f615822 --- /dev/null +++ b/tools/vnet/vnetd/select.c @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2003 - 2004 Mike Wray . + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. This library is + * distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include + +#include "select.h" + +/** Zero all the file descriptor sets. + * + * @param set select set + * @param fd file descriptor + * @return 0 on success, -1 otherwise + */ +void SelectSet_zero(SelectSet *set){ + set->n = 0; + FD_ZERO(&set->rd); + FD_ZERO(&set->wr); + FD_ZERO(&set->er); +} + +/** Add a file descriptor to the write set. + * + * @param set select set + * @param fd file descriptor + * @return 0 on success, -1 otherwise + */ +void SelectSet_add_read(SelectSet *set, int fd){ + FD_SET(fd, &set->rd); + if(fd > set->n) set->n = fd; +} + +/** Add a file descriptor to the write set. + * + * @param set select set + * @param fd file descriptor + * @return 0 on success, -1 otherwise + */ +void SelectSet_add_write(SelectSet *set, int fd){ + FD_SET(fd, &set->wr); + if(fd > set->n) set->n = fd; +} + +/** Select on file descriptors. + * + * @param set select set + * @param timeout timeout (may be NULL for no timeout) + * @return 0 on success, -1 otherwise + */ +int SelectSet_select(SelectSet *set, struct timeval *timeout){ + return select(set->n+1, &set->rd, &set->wr, &set->er, timeout); +} diff --git a/tools/vnet/vnetd/select.h b/tools/vnet/vnetd/select.h new file mode 100644 index 0000000000..6547915567 --- /dev/null +++ b/tools/vnet/vnetd/select.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2003 - 2004 Mike Wray . + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. This library is + * distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _VFC_SELECT_H_ +#define _VFC_SELECT_H_ + +/** Set of file descriptors for select. + */ +typedef struct SelectSet { + int n; + fd_set rd, wr, er; +} SelectSet; + +extern void SelectSet_zero(SelectSet *set); +extern void SelectSet_add_read(SelectSet *set, int fd); +extern void SelectSet_add_write(SelectSet *set, int fd); +extern int SelectSet_select(SelectSet *set, struct timeval *timeout); + +#endif /* ! _VFC_SELECT_H_ */ diff --git a/tools/vnet/vnetd/timer.c b/tools/vnet/vnetd/timer.c new file mode 100644 index 0000000000..01147346ff --- /dev/null +++ b/tools/vnet/vnetd/timer.c @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include +#include +#include +#include +#include + +#include "allocate.h" +#include "timer.h" + +#define MODULE_NAME "TIMER" +#undef DEBUG +#define DEBUG 1 +#include "debug.h" + +static Timer *timers = NULL; + +/** Get the time now as a double (in seconds). + * Returns zero if could not get the time. + * + * @return time now + */ +double time_now(void){ + struct timeval time; + if(gettimeofday(&time, NULL)) return 0.0; + return (double)time.tv_sec + (1.0e-6 * (double)time.tv_usec); +} + +/** Set the process real-time timer to go off at a given expiry time. + * The timer will not be set to go off in less than 10 ms + * (even if the expiry time is sooner, or in the past). + * + * @param expiry time (in seconds) + * @return 0 on success, error code otherwise + */ +static int timer_set(double expiry){ + struct itimerval val = {}; + struct itimerval old = {}; + double now, delay; + int err = 0; + + if(expiry == 0.0){ + val.it_value.tv_sec = 0; + val.it_value.tv_usec = 0; + } else { + now = time_now(); + delay = expiry - now; + if(delay < 0.01) delay = 0.01; + val.it_value.tv_sec = (long)delay; + val.it_value.tv_usec = (long)((delay - (double)(long)delay) * 1.0e6); + } + err = setitimer(ITIMER_REAL, &val, &old); + return err; +} + +static void Timer_free(Timer *z){ +#ifndef USE_GC + if(!z) return; + deallocate(z); +#endif +} + +/** Process any expired timers. + * Calls the functions of expired timers and removes them + * from the timer list. + * Reschedules the interval timer for the earliest expiring timer + * (if any). + * + * Should not be called from within the SIGALRM handler - set + * a flag there and call it later. + * + * @return 0 on success, error code otherwise. + */ +int process_timers(void){ + double now = time_now(); + Timer *curr, *next; + for(curr = timers; curr; curr = next){ + next = curr->next; + if(curr->expiry > now) break; + if(curr->fn) curr->fn(curr); + Timer_free(curr); + } + timers = curr; + timer_set((curr ? curr->expiry : 0)); + return 0; +} + +Timer * Timer_set(double delay, TimerFn *fn, void *data){ + // Get 'now'. + double now = time_now(); + Timer *timer = NULL, *prev, *curr, *next; + timer = ALLOCATE(Timer); + if(!timer) goto exit; + // Add delay to now to get expiry time. + timer->expiry = now + delay; + timer->fn = fn; + timer->data = data; + + // Insert timer in list ordered by (increasing) expiry time. + prev = NULL; + for(curr = timers; curr; prev = curr, curr = next){ + next = curr->next; + if(timer->expiry < curr->expiry) break; + } + if(prev){ + prev->next = timer; + } else { + timers = timer; + } + timer->next = curr; + + // Set interval timer to go off for earliest expiry time. + timer_set(timer->expiry); + exit: + return timer; +} + +int Timer_cancel(Timer *timer){ + // Remove timer from list. + int err = -ENOENT; + Timer *prev, *curr, *next; + for(prev = NULL, curr = timers; curr; prev = curr, curr = next){ + next = curr->next; + if(curr == timer){ + err = 0; + if(prev){ + prev->next = curr->next; + } else { + timers = curr->next; + } + curr->next = NULL; + Timer_free(curr); + break; + } + } + return err; +} + diff --git a/tools/vnet/vnetd/timer.h b/tools/vnet/vnetd/timer.h new file mode 100644 index 0000000000..8342efd8e5 --- /dev/null +++ b/tools/vnet/vnetd/timer.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2004 Mike Wray + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _VNET_TIMER_H_ +#define _VNET_TIMER_H_ + +struct Timer; + +typedef void TimerFn(struct Timer *); + +typedef struct Timer { + TimerFn *fn; + void *data; + double expiry; + struct Timer *next; +} Timer; + +extern void timer_alarm(void); +extern double time_now(void); +extern int process_timers(void); +extern Timer * Timer_set(double delay, TimerFn *fn, void *data); +extern int Timer_cancel(Timer *timer); + +#endif /* ! _VNET_TIMER_H_ */ diff --git a/tools/vnet/vnetd/vcache.c b/tools/vnet/vnetd/vcache.c new file mode 100644 index 0000000000..cd06988236 --- /dev/null +++ b/tools/vnet/vnetd/vcache.c @@ -0,0 +1,639 @@ +/* + * Copyright (C) 2004 Mike Wray . + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. This library is + * distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "allocate.h" +#include "hash_table.h" +#include "sys_net.h" +#include "sys_string.h" +#include "connection.h" +#include "marshal.h" +#include "timer.h" + +#undef offsetof +#include "vnetd.h" +#include "vcache.h" + +#define MODULE_NAME "VARP" +#define DEBUG 1 +#undef DEBUG +#include "debug.h" + +static VarpCache *vcache = NULL; + +void IPMessageQueue_init(IPMessageQueue *queue, int maxlen){ + queue->msg = NULL; + queue->len = 0; + queue->maxlen = maxlen; +} + +void IPMessageQueue_clear(IPMessageQueue *queue){ + queue->msg = NULL; + queue->len = 0; +} + +void IPMessageQueue_truncate(IPMessageQueue *queue, int n){ + IPMessage **p = &queue->msg; + int i; + for(i = 1; *p; p = &(*p)->next, i++){ + if(i == n){ + *p = NULL; + break; + } + } +} + +void IPMessageQueue_add(IPMessageQueue *queue, IPMessage *msg){ + msg->next = queue->msg; + queue->msg = msg; + queue->len++; + if(queue->len >= queue->maxlen){ + IPMessageQueue_truncate(queue, queue->maxlen); + } +} + +IPMessage * IPMessageQueue_pop(IPMessageQueue *queue){ + IPMessage *msg = NULL; + if(queue->len > 0){ + queue->len--; + msg = queue->msg; + queue->msg = msg->next; + msg->next = NULL; + } + return msg; +} + +void VarpCache_sweep(VarpCache *z, int all); + +/** Send a varp protocol message. + * + * @param opcode varp opcode (host order) + * @param vnet vnet id (in network order) + * @param vmac vmac (in network order) + * @return 0 on success, error code otherwise + */ +int varp_send(Conn *conn, uint16_t opcode, uint32_t vnet, Vmac *vmac, uint32_t addr){ + int err = 0; + int varp_n = sizeof(VarpHdr); + VarpHdr varph = {}; + + varph.id = htons(VARP_ID); + varph.opcode = htons(opcode); + varph.vnet = vnet; + varph.vmac = *vmac; + varph.addr = addr; + + if(0){ + struct sockaddr_in self; + socklen_t self_n; + getsockname(conn->sock, (struct sockaddr *)&self, &self_n); + dprintf("> sockname addr=%s port=%d\n", + inet_ntoa(self.sin_addr), ntohs(self.sin_port)); + } + dprintf("> addr=%s opcode=%d\n", + inet_ntoa(conn->addr.sin_addr), opcode); + dprintf("> vnet=%d vmac=" MACFMT " addr=" IPFMT "\n", + ntohl(vnet), MAC6TUPLE(vmac->mac), NIPQUAD(addr)); + err = marshal_bytes(conn->out, &varph, varp_n); + marshal_flush(conn->out); + dprintf("< err=%d\n", err); + return err; +} + +/* Test some flags. + * + * @param z varp entry + * @param flags to test + * @return nonzero if flags set + */ +int VCEntry_get_flags(VCEntry *z, int flags){ + return z->flags & flags; +} + +/** Set some flags. + * + * @param z varp entry + * @param flags to set + * @param set set flags on if nonzero, off if zero + * @return new flags value + */ +int VCEntry_set_flags(VCEntry *z, int flags, int set){ + if(set){ + z->flags |= flags; + } else { + z->flags &= ~flags; + } + return z->flags; +} + +/** Print a varp entry. + * + * @param ventry varp entry + */ +void VCEntry_print(VCEntry *ventry){ + if(ventry){ + char *c, *d; + switch(ventry->state){ + case VCACHE_STATE_INCOMPLETE: c = "INC"; break; + case VCACHE_STATE_REACHABLE: c = "RCH"; break; + case VCACHE_STATE_FAILED: c = "FLD"; break; + default: c = "UNK"; break; + } + d = (VCEntry_get_flags(ventry, VCACHE_FLAG_PROBING) ? "P" : " "); + + printf("VENTRY(%p %s %s vnet=%d vmac=" MACFMT " addr=" IPFMT " time=%g)\n", + ventry, + c, d, + ntohl(ventry->key.vnet), + MAC6TUPLE(ventry->key.vmac.mac), + NIPQUAD(ventry->addr), + ventry->timestamp); + } else { + printf("VENTRY: Null!\n"); + } +} + +int VCEntry_schedule(VCEntry *ventry); +void VCEntry_solicit(VCEntry *ventry); + +/** Function called when a varp entry timer goes off. + * If the entry is still incomplete, carries on probing. + * Otherwise stops probing. + * + * @param arg ventry + */ +static void ventry_timer_fn(Timer *timer){ + VCEntry *ventry = timer->data; + int probing = 0, scheduled = 0; + + //dprintf(">\n"); VCEntry_print(ventry); + if(ventry->state == VCACHE_STATE_REACHABLE){ + // Do nothing. + } else { + // Probe if haven't run out of tries, otherwise fail. + if(ventry->probes < VCACHE_PROBE_MAX){ + //probing = 1; + ventry->probes++; + scheduled = VCEntry_schedule(ventry); + //VCEntry_solicit(ventry); + probing = scheduled; + } else { + ventry->state = VCACHE_STATE_FAILED; + IPMessageQueue_clear(&ventry->queue); + } + } + if(!probing){ + VCEntry_set_flags(ventry, + (VCACHE_FLAG_PROBING + | VCACHE_FLAG_REMOTE_PROBE + | VCACHE_FLAG_LOCAL_PROBE), + 0); + } + VCEntry_set_flags(ventry, VCACHE_FLAG_PROBING, probing); + //dprintf("<\n"); +} + +/** Schedule the varp entry timer. + * + * @param ventry varp entry + */ +int VCEntry_schedule(VCEntry *ventry){ + int scheduled = 0; + if(ventry->probes == 1){ + scheduled = 1; + Timer_set(VCACHE_LOCAL_DELAY, ventry_timer_fn, ventry); + } else { + VCEntry_solicit(ventry); + } + return scheduled; +} + +/** Create a varp entry. Initializes the internal state. + * + * @param vnet vnet id + * @param vmac virtual MAC address (copied) + * @return ventry or null + */ +VCEntry * VCEntry_new(uint32_t vnet, Vmac *vmac){ + VCEntry *z = ALLOCATE(VCEntry); + z->state = VCACHE_STATE_INCOMPLETE; + z->timestamp = time_now(); + z->key.vnet = vnet; + z->key.vmac = *vmac; + return z; +} + +/** Hash function for keys in the varp cache. + * Hashes the vnet id and mac. + * + * @param k key (VCKey) + * @return hashcode + */ +Hashcode vcache_key_hash_fn(void *k){ + VCKey *key = k; + Hashcode h; + h = hash_2ul(key->vnet, + (key->vmac.mac[0] << 24) | + (key->vmac.mac[1] << 16) | + (key->vmac.mac[2] << 8) | + (key->vmac.mac[3] )); + h = hash_hul(h, + (key->vmac.mac[4] << 8) | + (key->vmac.mac[5] )); + return h; +} + +/** Test equality for keys in the varp cache. + * Compares vnet and mac. + * + * @param k1 key to compare (VCKey) + * @param k2 key to compare (VCKey) + * @return 1 if equal, 0 otherwise + */ +int vcache_key_equal_fn(void *k1, void *k2){ + VCKey *key1 = k1; + VCKey *key2 = k2; + return (key1->vnet == key2->vnet) + && (memcmp(key1->vmac.mac, key2->vmac.mac, ETH_ALEN) == 0); +} + +void VarpCache_schedule(VarpCache *z); + +/** Function called when the varp table timer goes off. + * Sweeps old varp cache entries and reschedules itself. + * + * @param arg varp table + */ +static void vcache_timer_fn(Timer *timer){ + VarpCache *z = timer->data; + //dprintf("> z=%p\n", z); + if(z){ + VarpCache_sweep(z, 0); + VarpCache_schedule(z); + } + //dprintf("<\n"); +} + +/** Schedule the varp table timer. + * + * @param z varp table + */ +void VarpCache_schedule(VarpCache *z){ + Timer_set(VCACHE_ENTRY_TTL, vcache_timer_fn, z); +} + +/** Print a varp table. + * + * @param z table + */ +void VarpCache_print(VarpCache *z){ + HashTable_for_decl(entry); + VCEntry *ventry; + + dprintf(">\n"); + HashTable_for_each(entry, vcache->table){ + ventry = entry->value; + VCEntry_print(ventry); + } + dprintf("<\n"); +} + +/** Print the varp cache. + */ +void vcache_print(void){ + VarpCache_print(vcache); +} + +/** Create a varp table. + * + * @return new table or null + */ +VarpCache * VarpCache_new(void){ + VarpCache *z = NULL; + + z = ALLOCATE(VarpCache); + z->table = HashTable_new(VCACHE_BUCKETS); + z->table->key_equal_fn = vcache_key_equal_fn; + z->table->key_hash_fn = vcache_key_hash_fn; + VarpCache_schedule(z); + return z; +} + +/** Add a new entry to the varp table. + * + * @param z table + * @param vnet vnet id + * @param vmac virtual MAC address (copied) + * @return new entry or null + */ +VCEntry * VarpCache_add(VarpCache *z, uint32_t vnet, Vmac *vmac){ + VCEntry *ventry; + HTEntry *entry; + + ventry = VCEntry_new(vnet, vmac); + //dprintf("> "); VCEntry_print(ventry); + entry = HashTable_add(z->table, ventry, ventry); + return ventry; +} + +/** Remove an entry from the varp table. + * + * @param z table + * @param ventry entry to remove + * @return removed count + */ +int VarpCache_remove(VarpCache *z, VCEntry *ventry){ + return HashTable_remove(z->table, ventry); +} + +/** Lookup an entry in the varp table. + * + * @param z table + * @param vnet vnet id + * @param vmac virtual MAC addres + * @return entry found or null + */ +VCEntry * VarpCache_lookup(VarpCache *z, uint32_t vnet, Vmac *vmac){ + VCKey key = { .vnet = vnet, .vmac = *vmac }; + VCEntry *ventry; + ventry = HashTable_get(z->table, &key); + return ventry; +} + +void VCEntry_solicit(VCEntry *ventry){ + dprintf(">\n"); + if(VCEntry_get_flags(ventry, VCACHE_FLAG_LOCAL_PROBE)){ + dprintf("> local probe\n"); + varp_send(vnetd->bcast_conn, VARP_OP_REQUEST, ventry->key.vnet, &ventry->key.vmac, ventry->addr); + } + if(VCEntry_get_flags(ventry, VCACHE_FLAG_REMOTE_PROBE)){ + ConnList *l; + dprintf("> remote probe\n"); + for(l = vnetd->connections; l; l = l->next){ + varp_send(l->conn, VARP_OP_REQUEST, ventry->key.vnet, &ventry->key.vmac, ventry->addr); + } + + } + dprintf("<\n"); +} + +int VCEntry_resolve(VCEntry *ventry, IPMessage *msg, int flags){ + int err = 0; + + dprintf("> "); //VCEntry_print(ventry); + ventry->state = VCACHE_STATE_INCOMPLETE; + VCEntry_set_flags(ventry, flags, 1); + IPMessageQueue_add(&ventry->queue, msg); + if(!VCEntry_get_flags(ventry, VCACHE_FLAG_PROBING)){ + VCEntry_set_flags(ventry, VCACHE_FLAG_PROBING, 1); + ventry->probes = 1; + VCEntry_schedule(ventry); + //VCEntry_solicit(ventry); + } + dprintf("< err=%d\n", err); + return err; +} + +/** Update a ventry. Sets the address and state to those given + * and sets the timestamp to 'now'. + * + * @param ventry varp entry + * @param addr care-of address + * @param state state + * @return 0 on success, error code otherwise + */ +int VCEntry_update(VCEntry *ventry, IPMessage *msg, VarpHdr *varph, int state){ + int err = 0; + double now = time_now(); + + if(VCEntry_get_flags(ventry, VCACHE_FLAG_PERMANENT)) goto exit; + ventry->addr = varph->addr; + ventry->timestamp = now; + ventry->state = state; + if(ventry->state == VCACHE_STATE_REACHABLE){ + // Process the output queue. + IPMessage *msg; + while((msg = IPMessageQueue_pop(&ventry->queue))){ + dprintf("> announce\n"); + varp_send(msg->conn, VARP_OP_ANNOUNCE, ventry->key.vnet, &ventry->key.vmac, ventry->addr); + } + } + exit: + return err; +} + +/** Update the ventry corresponding to the given varp header. + * + * @param z table + * @param varph varp header + * @param state state + * @return 0 on success, -ENOENT if no entry found + */ +int VarpCache_update(VarpCache *z, IPMessage *msg, VarpHdr *varph, int state){ + int err = 0; + VCEntry *ventry; + + dprintf(">\n"); + ventry = VarpCache_lookup(z, varph->vnet, &varph->vmac); + if(ventry){ + err = VCEntry_update(ventry, msg, varph, state); + } else { + err = -ENOENT; + } + dprintf("< err=%d\n", err); + return err; +} + + +/** Put old varp entries into the incomplete state. + * Permanent entries are not changed. + * If 'all' is non-zero, all non-permanent entries + * are put into the incomplete state, regardless of age. + * + * @param z table + * @param all reset all entries if non-zero + */ +void VarpCache_sweep(VarpCache *z, int all){ + HashTable_for_decl(entry); + VCEntry *ventry; + double now = time_now(); + double old = now - VCACHE_ENTRY_TTL; + + dprintf(">\n"); + HashTable_for_each(entry, vcache->table){ + ventry = entry->value; + if(!VCEntry_get_flags(ventry, VCACHE_FLAG_PERMANENT) && + (all || (ventry->timestamp < old))){ + ventry->state = VCACHE_STATE_INCOMPLETE; + } + } + dprintf("<\n"); +} + +/** Forward a varp message. + * If local forwards it to remote vnetds. + * If not local forwards it to local net. + * + * @param varph varp message to forward + * @param local whether it's local or not + */ +void vcache_forward_varp(VarpHdr *varph, int local){ + uint16_t opcode = ntohs(varph->opcode); + if(local){ + ConnList *l; + for(l = vnetd->connections; l; l = l->next){ + varp_send(l->conn, opcode, varph->vnet, &varph->vmac, varph->addr); + } + } else { + varp_send(vnetd->bcast_conn, opcode, varph->vnet, &varph->vmac, varph->addr); + } +} + +/** Handle a varp request. + * + * @param msg incoming message + * @param varph varp message + * @return 0 if ok, -ENOENT if no matching vif, or error code + */ +#if 1 +int vcache_handle_request(IPMessage *msg, VarpHdr *varph, int local){ + dprintf("> local=%d\n", local); + vcache_forward_varp(varph, local); + dprintf("<\n"); + return 0; +} + +#else +int vcache_handle_request(IPMessage *msg, VarpHdr *varph, int local){ + int err = -ENOENT; + uint32_t vnet; + Vmac *vmac; + VCEntry *ventry = NULL; + int reply = 0; + + dprintf(">\n"); + vnet = htonl(varph->vnet); + vmac = &varph->vmac; + ventry = VarpCache_lookup(vcache, vnet, vmac); + if(!ventry){ + ventry = VarpCache_add(vcache, vnet, vmac); + } + if(local){ + // Request coming from the local subnet (on our udp port). + if(ventry->state == VCACHE_STATE_REACHABLE){ + if(local){ + // Have an entry, and it's non-local - reply (locally). + // Potential out-of-date cache problem. + // Should query remotely instead of replying. + varp_send(conn, VARP_OP_ANNOUNCE, ventry); + } + } else { + // Incomplete entry. Resolve. + VCEntry_resolve(ventry, msg, VCACHE_FLAG_REMOTE_PROBE); + } + } else { + // Non-local request (on one of our tcp connetions). + if(ventry->state == VCACHE_STATE_REACHABLE){ + if(local){ + // Have an entry and it's local - reply (remotely). + // Potential out-of-date cache problem. + // Should query locally instead of replying. + varp_send(msg->conn, VARP_OP_ANNOUNCE, ventry); + } else { + // Have a non-local entry - do nothing and assume someone else + // will reply. + } + } else { + // Incomplete entry. Resolve. + VCEntry_resolve(ventry, msg, VCACHE_FLAG_LOCAL_PROBE); + } + } + exit: + dprintf("< err=%d\n", err); + return err; +} +#endif + +/** Handle a varp announce message. + * Update the matching ventry if we have one. + * + * @param msg incoming message + * @param varp message + * @return 0 if OK, -ENOENT if no matching entry + */ +int vcache_handle_announce(IPMessage *msg, VarpHdr *varph, int local){ + int err = 0; + + vcache_forward_varp(varph, local); + err = VarpCache_update(vcache, msg, varph, VCACHE_STATE_REACHABLE); + return err; +} + +/** Handle an incoming varp message. + * + * @param msg incoming message + * @return 0 if OK, error code otherwise + */ +int vcache_handle_message(IPMessage *msg, int local){ + int err = -EINVAL; + VnetMsg *vmsg = msg->data; + VarpHdr *varph = &vmsg->varp.varph; + + dprintf(">\n"); + if(1){ + dprintf("> src=%s:%d\n", inet_ntoa(msg->saddr.sin_addr), ntohs(msg->saddr.sin_port)); + dprintf("> dst=%s:%d\n", inet_ntoa(msg->daddr.sin_addr), ntohs(msg->daddr.sin_port)); + dprintf("> opcode=%d vnet=%u vmac=" MACFMT "\n", + ntohs(varph->opcode), ntohl(varph->vnet), MAC6TUPLE(varph->vmac.mac)); + } + switch(ntohs(varph->opcode)){ + case VARP_OP_REQUEST: + err = vcache_handle_request(msg, varph, local); + break; + case VARP_OP_ANNOUNCE: + err = vcache_handle_announce(msg, varph, local); + break; + default: + break; + } + dprintf("< err=%d\n", err); + return err; +} + +/** Initialize the varp cache. + * + * @return 0 on success, error code otherwise + */ +int vcache_init(void){ + int err = 0; + + if(!vcache){ + vcache = VarpCache_new(); + } + return err; +} diff --git a/tools/vnet/vnetd/vcache.h b/tools/vnet/vnetd/vcache.h new file mode 100644 index 0000000000..1b0f492f83 --- /dev/null +++ b/tools/vnet/vnetd/vcache.h @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2004 Mike Wray . + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. This library is + * distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _VNET_VCACHE_H_ +#define _VNET_VCACHE_H_ + +#include "hash_table.h" + +/** Time-to-live of varp cache entries (in seconds).*/ +#define VCACHE_ENTRY_TTL 30.0 + +/** Maximum number of varp probes to make. */ +#define VCACHE_PROBE_MAX 5 + +/** Interval between varp probes (in seconds). */ +#define VCACHE_PROBE_INTERVAL 3.0 + +/** Delay before forwarding a local probe (in seconds). */ +#define VCACHE_LOCAL_DELAY 2.0 + +/** Number of buckets in the varp cache (must be prime). */ +#define VCACHE_BUCKETS 3001 + +enum { + VCACHE_STATE_INCOMPLETE = 1, + VCACHE_STATE_REACHABLE = 2, + VCACHE_STATE_FAILED = 3 +}; + +enum { + VCACHE_FLAG_PROBING = 1, + VCACHE_FLAG_PERMANENT = 2, + VCACHE_FLAG_LOCAL_PROBE = 4, + VCACHE_FLAG_REMOTE_PROBE = 8, +}; + + +#include +/* + * Display an IP address in readable format. + */ + +#define NIPQUAD(addr) \ + ((unsigned char *)&addr)[0], \ + ((unsigned char *)&addr)[1], \ + ((unsigned char *)&addr)[2], \ + ((unsigned char *)&addr)[3] + +#if defined(__LITTLE_ENDIAN) +#define HIPQUAD(addr) \ + ((unsigned char *)&addr)[3], \ + ((unsigned char *)&addr)[2], \ + ((unsigned char *)&addr)[1], \ + ((unsigned char *)&addr)[0] +#elif defined(__BIG_ENDIAN) +#define HIPQUAD NIPQUAD +#else +#error "Please fix asm/byteorder.h" +#endif /* __LITTLE_ENDIAN */ + +#define IPFMT "%u.%u.%u.%u" +#define MACFMT "%02x:%02x:%02x:%02x:%02x:%02x" + +#define MAC6TUPLE(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5] + +typedef struct IPMessage { + Conn *conn; + struct sockaddr_in saddr; + struct sockaddr_in daddr; + void *data; + struct IPMessage *next; +} IPMessage; + +typedef struct IPMessageQueue { + IPMessage *msg; + int len; + int maxlen; +} IPMessageQueue; + +/** Key for varp cache entries. */ +typedef struct VCKey { + /** Vnet id (network order). */ + uint32_t vnet; + /** Virtual MAC address. */ + Vmac vmac; +} VCKey; + +typedef struct VCEntry { + /** Key for the entry. */ + VCKey key; + + /** Care-of address for the key. */ + uint32_t addr; + + /** Alias coa if we are a gateway. */ + //uint32_t gateway; + /** Encapsulation to use (if a gateway). */ + //uint32_t encaps; + + /** Where this entry came from. */ + uint32_t source; + + /** Last-updated timestamp. */ + double timestamp; + + /** State. */ + short state; + + /** Flags. */ + short flags; + + /** Number of probes sent. */ + int probes; + + /** List of messages to reply to when completes. */ + IPMessageQueue queue; + +} VCEntry; + +/** The varp cache. Varp cache entries indexed by VCKey. */ +typedef struct VarpCache { + HashTable *table; +} VarpCache; + +int vcache_init(void); +int vcache_handle_message(IPMessage *msg, int local); + +#endif /* ! _VNET_VCACHE_H_ */ diff --git a/tools/vnet/vnetd/vnetd.c b/tools/vnet/vnetd/vnetd.c new file mode 100644 index 0000000000..5a37e160ef --- /dev/null +++ b/tools/vnet/vnetd/vnetd.c @@ -0,0 +1,1239 @@ +/* + * Copyright (C) 2004 Mike Wray . + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. This library is + * distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/** @file + * + * Vnetd tcp messages: + * + * - varp request: request care-of-addr for a vif. + * If know answer, reply. If not broadcast locally. + * + * - varp announce: reply to a varp request. + * If a (local) request is pending, remember and broadcast locally. + * + * - vnet subscribe: indicate there are local vifs in a vnet (use varp announce?). + * + * - vnet forward: tunneled broadcast packet to rebroadcast. + * Broadcast locally (if there are vifs in the vnet). + * + * + * Vnetd udp messages (varp): + * + * - local varp request: + * If know and vif is non-local, reply. + * If know and vif is local, do nothing (but announce will reset). + * If have entry saying is local and no-one answers - remove (? or rely on entry timeout). + * If don't know and there is no (quick) local reply, forward to peers. + * + * - remote varp request: + * If know, reply. + * If don't know, query locally (and queue request). + * + * - varp announce: remember and adjust vnet subscriptions. + * Forward to peers if a request is pending. + * + * Vnetd broadcast messages (tunneling): + * + * - etherip: forward to peers (on the right vnets) + * + * - esp: forward to peers (on the right vnets) + * + * + * For etherip can tell the vnet from the header (in clear). + * But for esp can't. So should use mcast to define? Or always some clear header? + * + * Make ssl on tcp connections optional. + * + * So far have been assuming esp for security. + * But could use vnetd to forward and use ssl on the connection. + * But has usual probs with efficiency. + * However, should 'just work' if the coa for the vif has been set + * to the vnetd. How? Vnetd configured to act as gateway for + * some peers? Then would rewrite varp announce to itself and forward + * traffic to peer. + * + * Simplify - make each vnetd have one peer? + * If need to link more subnets, add vnetds? + * + * Need requests table for each tcp conn (incoming). + * - entries we want to resolve (and fwd the answer). + * + * Need requests table for the udp socket. + * - entries we want to resolve (and return the answer). + * + * Need table of entries we know. + * - from caching local announce + * - from caching announce reply to forwarded request + * + * Problem with replying to requests from the cache - if the cache + * is out of date we reply with incorrect data. So if a VM migrates + * we will advertise the old location until it times out. + * + * So should probably not reply out of the cache at all - but always + * query for the answer. Could query direct to old location if + * entry is valid the first time, and broadcast if no reply in timeout. + * Causes delay if migrated - may as well broadcast. + * + * Need to watch out for query loops. If have 3 vnetds A,B,C and + * A gets a query, forwards to B and C. B forwards to C, which + * forwards to A, and on forever. So if have an entry that has been + * probed, do not forward it when get another query for it. + * + * @author Mike Wray + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +//#include // For struct iphdr; +#include // For struct iphdr; + +#include +#include "if_etherip.h" +#include "if_varp.h" + +#include "allocate.h" + +#include "vnetd.h" +#include "file_stream.h" +#include "string_stream.h" +#include "socket_stream.h" +#include "sys_net.h" + +#include "enum.h" +#include "sxpr.h" + +#include "marshal.h" +#include "connection.h" +#include "select.h" +#include "timer.h" +#include "vcache.h" + +int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, Conn **val); + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +/** Socket flags. */ +enum { + VSOCK_REUSE=1, + VSOCK_BIND=2, + VSOCK_CONNECT=4, + VSOCK_BROADCAST=8, + VSOCK_MULTICAST=16, + }; + +#define PROGRAM "vnetd" +#define VERSION "0.1" + +#define MODULE_NAME PROGRAM +#define DEBUG +#undef DEBUG +#include "debug.h" + +#define OPT_PORT 'p' +#define KEY_PORT "port" +#define DOC_PORT "\n\t" PROGRAM " UDP port (as a number or service name)" + +#define OPT_ADDR 'm' +#define KEY_ADDR "mcaddr" +#define DOC_ADDR "
\n\t" PROGRAM " multicast address" + +#define OPT_PEER 'r' +#define KEY_PEER "peer" +#define DOC_PEER "\n\t Peer " PROGRAM " to connect to (IP address or hostname)" + +#define OPT_FILE 'f' +#define KEY_FILE "file" +#define DOC_FILE "\n\t Configuration file to load" + +#define OPT_CTRL 'c' +#define KEY_CTRL "control" +#define DOC_CTRL "\n\t " PROGRAM " control port (as a number or service name)" + +#define OPT_HELP 'h' +#define KEY_HELP "help" +#define DOC_HELP "\n\tprint help" + +#define OPT_VERSION 'v' +#define KEY_VERSION "version" +#define DOC_VERSION "\n\tprint version" + +#define OPT_VERBOSE 'V' +#define KEY_VERBOSE "verbose" +#define DOC_VERBOSE "\n\tverbose flag" + +/** Print a usage message. + * Prints to stdout if err is zero, and exits with 0. + * Prints to stderr if err is non-zero, and exits with 1. + * + * @param err error code + */ +static void usage(int err){ + FILE *out = (err ? stderr : stdout); + + fprintf(out, "Usage: %s [options]\n", PROGRAM); + fprintf(out, "-%c, --%s %s\n", OPT_ADDR, KEY_ADDR, DOC_ADDR); + fprintf(out, "-%c, --%s %s\n", OPT_PORT, KEY_PORT, DOC_PORT); + fprintf(out, "-%c, --%s %s\n", OPT_PEER, KEY_PEER, DOC_PEER); + fprintf(out, "-%c, --%s %s\n", OPT_VERBOSE, KEY_VERBOSE, DOC_VERBOSE); + fprintf(out, "-%c, --%s %s\n", OPT_VERSION, KEY_VERSION, DOC_VERSION); + fprintf(out, "-%c, --%s %s\n", OPT_HELP, KEY_HELP, DOC_HELP); + exit(err ? 1 : 0); +} + +/** Short options. Options followed by ':' take an argument. */ +static char *short_opts = (char[]){ + OPT_ADDR, ':', + OPT_PORT, ':', + OPT_PEER, ':', + OPT_HELP, + OPT_VERSION, + OPT_VERBOSE, + 0 }; + +/** Long options. */ +static struct option const long_opts[] = { + { KEY_ADDR, required_argument, NULL, OPT_ADDR }, + { KEY_PORT, required_argument, NULL, OPT_PORT }, + { KEY_PEER, required_argument, NULL, OPT_PEER }, + { KEY_HELP, no_argument, NULL, OPT_HELP }, + { KEY_VERSION, no_argument, NULL, OPT_VERSION }, + { KEY_VERBOSE, no_argument, NULL, OPT_VERBOSE }, + { NULL, 0, NULL, 0 } +}; + +/** Get address of vnetd. So we can ignore broadcast traffic + * we sent ourselves. + * + * @param addr + * @return 0 on success, error code otherwise + */ +int get_self_addr(struct sockaddr_in *addr){ + int err = 0; + char hostname[1024] = {}; + unsigned long saddr; + + //dprintf(">\n"); + err = gethostname(hostname, sizeof(hostname) -1); + if(err) goto exit; + err = get_host_address(hostname, &saddr); + if(err == 0){ err = -ENOENT; goto exit; } + err = 0; + addr->sin_addr.s_addr = saddr; + exit: + //dprintf("< err=%d\n", err); + return err; +} + +/** Marshal a message. + * + * @param io destination + * @param msg message + * @return number of bytes written, or negative error code + */ +int VnetMsg_marshal(IOStream *io, VnetMsg *msg){ + int err = 0; + int hdr_n = sizeof(VnetMsgHdr); + + err = marshal_uint16(io, msg->hdr.id); + if(err < 0) goto exit; + err = marshal_uint16(io, msg->hdr.opcode); + if(err < 0) goto exit; + switch(msg->hdr.id){ + case VNET_VARP_ID: + err = marshal_bytes(io, ((char*)msg) + hdr_n, sizeof(VarpHdr) - hdr_n); + break; + case VNET_FWD_ID: + err = marshal_uint16(io, msg->fwd.protocol); + if(err < 0) goto exit; + err = marshal_uint16(io, msg->fwd.len); + if(err < 0) goto exit; + err = marshal_bytes(io, msg->fwd.data, msg->fwd.len); + break; + default: + err = -EINVAL; + break; + } + exit: + return err; +} + +/** Unmarshal a message. + * + * @param io source + * @param msg message to unmarshal into + * @return number of bytes read, or negative error code + */ +int VnetMsg_unmarshal(IOStream *io, VnetMsg *msg){ + int err = 0; + int hdr_n = sizeof(VnetMsgHdr); + + dprintf("> id\n"); + err = unmarshal_uint16(io, &msg->hdr.id); + if(err < 0) goto exit; + dprintf("> opcode\n"); + err = unmarshal_uint16(io, &msg->hdr.opcode); + if(err < 0) goto exit; + switch(msg->hdr.id){ + case VNET_VARP_ID: + msg->hdr.opcode = htons(msg->hdr.opcode); + dprintf("> varp hdr_n=%d varphdr=%d\n", hdr_n, sizeof(VarpHdr)); + err = unmarshal_bytes(io, ((char*)msg) + hdr_n, sizeof(VarpHdr) - hdr_n); + break; + case VNET_FWD_ID: + dprintf("> forward\n"); + err = unmarshal_uint16(io, &msg->fwd.protocol); + if(err < 0) goto exit; + dprintf("> forward len\n"); + err = unmarshal_uint16(io, &msg->fwd.len); + if(err < 0) goto exit; + dprintf("> forward bytes\n"); + err = unmarshal_bytes(io, msg->fwd.data, msg->fwd.len); + break; + default: + wprintf("> Invalid id %d\n", msg->hdr.id); + err = -EINVAL; + break; + } + exit: + dprintf("< err=%d \n", err); + return err; +} + +Vnetd _vnetd = {}; +Vnetd *vnetd = &_vnetd; + +/** Counter for timer alarms. + */ +static unsigned timer_alarms = 0; + +/** Set vnetd defaults. + * + * @param vnetd vnetd + */ +void vnetd_set_defaults(Vnetd *vnetd){ + *vnetd = (Vnetd){}; + vnetd->port = htons(VNETD_PORT); + vnetd->peer_port = vnetd->port; //htons(VNETD_PEER_PORT); + vnetd->verbose = FALSE; + vnetd->peers = ONULL; + vnetd->mcast_addr.sin_addr.s_addr = VARP_MCAST_ADDR; + vnetd->mcast_addr.sin_port = vnetd->port; +} + +uint32_t vnetd_mcast_addr(Vnetd *vnetd){ + return vnetd->mcast_addr.sin_addr.s_addr; +} + +uint16_t vnetd_mcast_port(Vnetd *vnetd){ + return vnetd->mcast_addr.sin_port; +} + +/** Add a connection to a peer. + * + * @param vnetd vnetd + * @param conn connection + */ +void connections_add(Vnetd *vnetd, Conn *conn){ + vnetd->connections = ConnList_add(conn, vnetd->connections); +} + +/** Delete a connection to a peer. + * + * @param vnetd vnetd + * @param conn connection + */ +void connections_del(Vnetd *vnetd, Conn *conn){ + ConnList *prev, *curr, *next; + for(prev = NULL, curr = vnetd->connections; curr; prev = curr, curr = next){ + next = curr->next; + if(curr->conn == conn){ + if(prev){ + prev->next = curr->next; + } else { + vnetd->connections = curr->next; + } + } + } +} + +/** Close all connections to peers. + * + * @param vnetd vnetd + */ +void connections_close_all(Vnetd *vnetd){ + ConnList *l; + for(l = vnetd->connections; l; l = l->next){ + Conn_close(l->conn); + } + vnetd->connections = NULL; +} + +/** Add peer connections to a select set. + * + * @param vnetd vnetd + * @param set select set + */ +void connections_select(Vnetd *vnetd, SelectSet *set){ + ConnList *l; + for(l = vnetd->connections; l; l = l->next){ + SelectSet_add_read(set, l->conn->sock); + } +} + +/** Handle peer connections according to a select set. + * + * @param vnetd vnetd + * @param set indicates ready connections + */ +void connections_handle(Vnetd *vnetd, SelectSet *set){ + ConnList *prev, *curr, *next; + Conn *conn; + for(prev = NULL, curr = vnetd->connections; curr; prev = curr, curr = next){ + next = curr->next; + conn = curr->conn; + if(FD_ISSET(conn->sock, &set->rd)){ + int conn_err; + conn_err = Conn_handle(conn); + if(conn_err){ + if(prev){ + prev->next = curr->next; + } else { + vnetd->connections = curr->next; + } + } + } + } +} + +/** Forward a message from a peer onto the local subnet. + * + * @param vnetd vnetd + * @param vmsg message + * @return 0 on success, error code otherwise + */ +int vnetd_forward_local(Vnetd *vnetd, VnetMsg *vmsg){ + int err = 0; + int sock = 0; + struct sockaddr_in addr_in; + struct sockaddr *addr = (struct sockaddr *)&addr_in; + socklen_t addr_n = sizeof(addr_in); + + dprintf(">\n"); + switch(vmsg->fwd.protocol){ + case IPPROTO_ESP: + dprintf("> ESP\n"); + sock = vnetd->esp_sock; break; + case IPPROTO_ETHERIP: + dprintf("> Etherip\n"); + sock = vnetd->etherip_sock; break; + default: + err = -EINVAL; + goto exit; + } + addr_in.sin_family = AF_INET; + addr_in.sin_addr = vnetd->mcast_addr.sin_addr; + addr_in.sin_port = htons(vmsg->fwd.protocol); + dprintf("> send dst=%s protocol=%d len=%d\n", + inet_ntoa(addr_in.sin_addr), vmsg->fwd.protocol, vmsg->fwd.len); + err = sendto(sock, vmsg->fwd.data, vmsg->fwd.len, 0, addr, addr_n); + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Forward a message to a peer. + * + * @param conn peer connection + * @param protocol message protocol + * @param data message data + * @param data_n message size + * @return 0 on success, error code otherwise + */ +int vnetd_forward_peer(Conn *conn, int protocol, void *data, int data_n){ + int err = 0; + IOStream _io, *io = &_io; + StringData sdata; + char buf[1600]; + + dprintf("> addr=%s protocol=%d n=%d\n", + inet_ntoa(conn->addr.sin_addr), protocol, data_n); + string_stream_init(io, &sdata, buf, sizeof(buf)); + dprintf("> 10\n"); + err = marshal_uint16(io, VNET_FWD_ID); + if(err < 0) goto exit; + dprintf("> 20\n"); + err = marshal_uint16(io, 0); + if(err < 0) goto exit; + dprintf("> 30\n"); + err = marshal_uint16(io, protocol); + if(err < 0) goto exit; + dprintf("> 40\n"); + err = marshal_uint16(io, data_n); + if(err < 0) goto exit; + dprintf("> 50\n"); + err = marshal_bytes(io, data, data_n); + if(err < 0) goto exit; + dprintf("> 60 bytes=%d\n", IOStream_get_written(io)); + err = IOStream_write(conn->out, buf, IOStream_get_written(io)); + IOStream_flush(conn->out); + exit: + if(err < 0) perror(__FUNCTION__); + dprintf("< err=%d\n", err); + return err; +} + +/** Forward a message to all peers. + * + * @param vnetd vnetd + * @param protocol message protocol + * @param data message data + * @param data_n message size + * @return 0 on success, error code otherwise + */ +int vnetd_forward_peers(Vnetd *vnetd, int protocol, void *data, int data_n){ + int err = 0; + ConnList *curr, *next; + + dprintf(">\n"); + for(curr = vnetd->connections; curr; curr = next){ + next = curr->next; + vnetd_forward_peer(curr->conn, protocol, data, data_n); + } + dprintf("< err=%d\n", err); + return err; +} + +/** Handler for a peer connection. + * Reads a VnetMsg from the connection and handles it. + * + * @param conn peer connection + * @return 0 on success, error code otherwise + */ +int conn_handle_fn(Conn *conn){ + int err = 0; + VnetMsg *vmsg = ALLOCATE(VnetMsg); + IPMessage *msg = NULL; + + dprintf("> addr=%s port=%u\n", + inet_ntoa(conn->addr.sin_addr), + ntohs(conn->addr.sin_port)); + err = VnetMsg_unmarshal(conn->in, vmsg); + if(err < 0){ + wprintf("> Unmarshal error %d\n", err); + goto exit; + } + switch(vmsg->hdr.id){ + case VNET_VARP_ID: + dprintf("> Got varp message\n"); + msg = ALLOCATE(IPMessage); + msg->conn = conn; + msg->saddr = conn->addr; + msg->data = vmsg; + err = vcache_handle_message(msg, 0); + err = 0; + break; + case VNET_FWD_ID: + dprintf("> Got forward message\n"); + err = vnetd_forward_local(vnetd, vmsg); + err = 0; + break; + default: + wprintf("> Invalid id=%d\n", vmsg->hdr.id); + err = -EINVAL; + break; + } + exit: + dprintf("< err=%d\n", err); + return err; +} + +/** Accept an incoming tcp connection from a peer vnetd. + * + * @param sock tcp socket + * @return 0 on success, error code otherwise + */ +int vnetd_accept(Vnetd *vnetd, Conn *conn){ + Conn *new_conn = NULL; + struct sockaddr_in peer_in; + struct sockaddr *peer = (struct sockaddr *)&peer_in; + socklen_t peer_n = sizeof(peer_in); + int peersock; + int err = 0; + + //dprintf(">\n"); + new_conn = Conn_new(conn_handle_fn, vnetd); + //dprintf("> accept...\n"); + peersock = accept(conn->sock, peer, &peer_n); + //dprintf("> accept=%d\n", peersock); + if(peersock < 0){ + perror("accept"); + err = -errno; + goto exit; + } + iprintf("> Accepted connection from %s:%d\n", + inet_ntoa(peer_in.sin_addr), htons(peer_in.sin_port)); + err = Conn_init(new_conn, peersock, SOCK_STREAM, peer_in); + if(err) goto exit; + connections_add(vnetd, new_conn); + exit: + if(err){ + Conn_close(new_conn); + } + if(err < 0) wprintf("< err=%d\n", err); + return err; +} + +/** Connect to a peer vnetd. + * + * @param vnetd vnetd + * @param addr address + * @param port port + * @return 0 on success, error code otherwise + */ +int vnetd_connect(Vnetd *vnetd, struct in_addr addr, uint16_t port){ + Conn *conn = NULL; + int err = 0; + + //dprintf(">\n"); + conn = Conn_new(conn_handle_fn, vnetd); + err = Conn_connect(conn, SOCK_STREAM, addr, port); + if(err) goto exit; + connections_add(vnetd, conn); + exit: + if(err){ + Conn_close(conn); + } + //dprintf(" < err=%d\n", err); + return err; +} + +/** Handle a message on the udp socket. + * Expecting to see VARP messages only. + * + * @param sock udp socket + * @return 0 on success, error code otherwise + */ +int vnetd_handle_udp(Vnetd *vnetd, Conn *conn){ + int err = 0, rcv = 0; + struct sockaddr_in self_in; + struct sockaddr_in peer_in; + struct sockaddr *peer = (struct sockaddr *)&peer_in; + socklen_t peer_n = sizeof(peer_in); + VnetMsg *vmsg = NULL; + void *data; + int data_n; + int flags = 0; + IPMessage *msg = NULL; + + //dprintf(">\n"); + self_in = vnetd->addr; + vmsg = ALLOCATE(VnetMsg); + data = &vmsg->varp.varph; + data_n = sizeof(VarpHdr); + rcv = recvfrom(conn->sock, data, data_n, flags, peer, &peer_n); + if(rcv < 0){ + err = rcv; + goto exit; + } + dprintf("> Received %d bytes from %s:%d\n", + rcv, inet_ntoa(peer_in.sin_addr), htons(peer_in.sin_port)); + if(rcv != data_n){ + err = -EINVAL; + goto exit; + } + if(peer_in.sin_addr.s_addr == self_in.sin_addr.s_addr){ + //dprintf("> Ignoring message from self.\n"); + goto exit; + } + msg = ALLOCATE(IPMessage); + msg->conn = conn; + msg->saddr = peer_in; + msg->data = vmsg; + + err = vcache_handle_message(msg, 1); + exit: + //dprintf("< err=%d\n", err); + return err; +} + +/** Handle a message on a raw socket. + * Only deals with etherip and esp. + * Forwards messages to peers. + * + * @param vnetd vnetd + * @param sock socket + * @param protocol protocol + * @return 0 on success, error code otherwise + */ +int vnetd_handle_protocol(Vnetd *vnetd, int sock, int protocol){ + int err = 0, rcv = 0; + struct sockaddr_in self_in; + struct sockaddr_in peer_in; + struct sockaddr *peer = (struct sockaddr *)&peer_in; + socklen_t peer_n = sizeof(peer_in); + uint8_t buf[VNET_FWD_MAX]; + int buf_n = sizeof(buf); + char *data, *end; + int flags = 0; + struct iphdr *iph = NULL; + + //dprintf(">\n"); + self_in = vnetd->addr; + rcv = recvfrom(sock, buf, buf_n, flags, peer, &peer_n); + if(rcv < 0){ + err = rcv; + goto exit; + } + dprintf("> Received %d bytes from %s protocol=%d\n", + rcv, inet_ntoa(peer_in.sin_addr), protocol); + if(rcv < sizeof(struct iphdr)){ + wprintf("> Message too short for IP header\n"); + err = -EINVAL; + goto exit; + } + if(peer_in.sin_addr.s_addr == self_in.sin_addr.s_addr){ + dprintf("> Ignoring message from self.\n"); + goto exit; + } + data = buf; + end = buf + rcv; + iph = (void*)data; + data += (iph->ihl << 2); + vnetd_forward_peers(vnetd, protocol, data, end - data); + exit: + //dprintf("< err=%d\n", err); + return err; +} + +/** Socket select loop. + * Accepts connections on the tcp socket and handles + * messages on the other sockets. + * + * @return 0 on success, error code otherwise + */ +int vnetd_select(Vnetd *vnetd){ + int err = 0; + SelectSet set = {}; + while(1){ + SelectSet_zero(&set); + SelectSet_add_read(&set, vnetd->udp_conn->sock); + SelectSet_add_read(&set, vnetd->bcast_conn->sock); + SelectSet_add_read(&set, vnetd->etherip_sock); + SelectSet_add_read(&set, vnetd->esp_sock); + SelectSet_add_read(&set, vnetd->listen_conn->sock); + connections_select(vnetd, &set); + err = SelectSet_select(&set, NULL); + if(err == 0) continue; + if(err < 0){ + if(errno == EINTR){ + if(timer_alarms){ + timer_alarms = 0; + process_timers(); + } + continue; + } + perror("select"); + goto exit; + } + if(FD_ISSET(vnetd->udp_conn->sock, &set.rd)){ + vnetd_handle_udp(vnetd, vnetd->udp_conn); + } + if(FD_ISSET(vnetd->bcast_conn->sock, &set.rd)){ + vnetd_handle_udp(vnetd, vnetd->bcast_conn); + } + if(FD_ISSET(vnetd->etherip_sock, &set.rd)){ + vnetd_handle_protocol(vnetd, vnetd->etherip_sock, IPPROTO_ETHERIP); + } + if(FD_ISSET(vnetd->esp_sock, &set.rd)){ + vnetd_handle_protocol(vnetd, vnetd->esp_sock, IPPROTO_ESP); + } + connections_handle(vnetd, &set); + if(FD_ISSET(vnetd->listen_conn->sock, &set.rd)){ + vnetd_accept(vnetd, vnetd->listen_conn); + } + } + exit: + return err; +} + +/** Set socket option to reuse address. + */ +int setsock_reuse(int sock, int reuse){ + int err = 0; + err = setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)); + if(err < 0){ + err = -errno; + perror("setsockopt SO_REUSEADDR"); + } + return err; +} + +/** Set socket broadcast option. + */ +int setsock_broadcast(int sock, int bcast){ + int err = 0; + err = setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &bcast, sizeof(bcast)); + if(err < 0){ + err = -errno; + perror("setsockopt SO_BROADCAST"); + } + return err; +} + +/** Join a socket to a multicast group. + */ +int setsock_multicast(int sock, uint32_t saddr){ + int err = 0; + struct ip_mreqn mreq = {}; + int mloop = 0; + // See 'man 7 ip' for these options. + mreq.imr_multiaddr.s_addr = saddr; // IP multicast address. + mreq.imr_address = vnetd->addr.sin_addr; // Interface IP address. + mreq.imr_ifindex = 0; // Interface index (0 means any). + err = setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &mloop, sizeof(mloop)); + if(err < 0){ + err = -errno; + perror("setsockopt IP_MULTICAST_LOOP"); + goto exit; + } + err = setsockopt(sock, SOL_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); + if(err < 0){ + err = -errno; + perror("setsockopt IP_ADD_MEMBERSHIP"); + goto exit; + } + exit: + return err; +} + +/** Set a socket's multicast ttl (default is 1). + */ +int setsock_multicast_ttl(int sock, uint8_t ttl){ + int err = 0; + err = setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); + if(err < 0){ + err = -errno; + perror("setsockopt IP_MULTICAST_TTL"); + } + return err; +} + + +char * socket_flags(int flags){ + static char s[6]; + int i = 0; + s[i++] = (flags & VSOCK_CONNECT ? 'c' : '-'); + s[i++] = (flags & VSOCK_BIND ? 'b' : '-'); + s[i++] = (flags & VSOCK_REUSE ? 'r' : '-'); + s[i++] = (flags & VSOCK_BROADCAST ? 'B' : '-'); + s[i++] = (flags & VSOCK_MULTICAST ? 'M' : '-'); + s[i++] = '\0'; + return s; +} + +/** Create a socket. + * The flags can include VSOCK_REUSE, VSOCK_BROADCAST, VSOCK_CONNECT. + * + * @param socktype socket type + * @param saddr address + * @param port port + * @param flags flags + * @param val return value for the socket connection + * @return 0 on success, error code otherwise + */ +int create_socket(int socktype, uint32_t saddr, uint32_t port, int flags, Conn **val){ + int err = 0; + int sock = 0; + struct sockaddr_in addr_in; + struct sockaddr *addr = (struct sockaddr *)&addr_in; + socklen_t addr_n = sizeof(addr_in); + Conn *conn = NULL; + int reuse, bcast; + + //dprintf(">\n"); + reuse = (flags & VSOCK_REUSE); + bcast = (flags & VSOCK_BROADCAST); + addr_in.sin_family = AF_INET; + addr_in.sin_addr.s_addr = saddr; + addr_in.sin_port = port; + dprintf("> flags=%s addr=%s port=%d\n", socket_flags(flags), + inet_ntoa(addr_in.sin_addr), ntohs(addr_in.sin_port)); + + sock = socket(AF_INET, socktype, 0); + if(sock < 0){ + err = -errno; + goto exit; + } + if(reuse){ + err = setsock_reuse(sock, reuse); + if(err < 0) goto exit; + } + if(bcast){ + err = setsock_broadcast(sock, bcast); + if(err < 0) goto exit; + } + if(flags & VSOCK_MULTICAST){ + err = setsock_multicast(sock, saddr); + if(err < 0) goto exit; + } + if(flags & VSOCK_CONNECT){ + err = connect(sock, addr, addr_n); + if(err < 0){ + err = -errno; + perror("connect"); + goto exit; + } + } + if(flags & VSOCK_BIND){ + err = bind(sock, addr, addr_n); + if(err < 0){ + err = -errno; + perror("bind"); + goto exit; + } + } + conn = Conn_new(NULL, NULL); + Conn_init(conn, sock, socktype, addr_in); + { + struct sockaddr_in self = {}; + socklen_t self_n; + getsockname(conn->sock, (struct sockaddr *)&self, &self_n); + dprintf("> sockname sock=%d addr=%s port=%d\n", + conn->sock, inet_ntoa(self.sin_addr), ntohs(self.sin_port)); + } + exit: + *val = (err ? NULL : conn); + //dprintf("< err=%d\n", err); + return err; +} + +/** Create the tcp listen socket. + * + * @param vnetd program arguments + * @param val return value for the socket + * @return 0 on success, error code otherwise + */ +int vnetd_listen_conn(Vnetd *vnetd, Conn **val){ + int err = 0; + int flags = VSOCK_BIND | VSOCK_REUSE; + //dprintf(">\n"); + err = create_socket(SOCK_STREAM, INADDR_ANY, vnetd->peer_port, flags, val); + if(err) goto exit; + err = listen((*val)->sock, 5); + if(err < 0){ + err = -errno; + perror("listen"); + goto exit; + } + exit: + if(err && *val){ + Conn_close(*val); + *val = NULL; + } + //dprintf("< err=%d\n", err); + return err; +} + +/** Create the udp socket. + * + * @param vnetd program arguments + * @param val return value for the socket + * @return 0 on success, error code otherwise + */ +int vnetd_udp_conn(Vnetd *vnetd, Conn **val){ + int err = 0; + uint32_t addr = INADDR_ANY; + uint16_t port = vnetd->port; + int flags = VSOCK_BIND | VSOCK_REUSE; + err = create_socket(SOCK_DGRAM, addr, port, flags, val); + return err; +} + +/** Create the broadcast socket. + * + * @param vnetd program arguments + * @param val return value for the socket + * @return 0 on success, error code otherwise + */ +int vnetd_broadcast_conn(Vnetd *vnetd, Conn **val){ + int err = 0; + uint32_t addr = vnetd_mcast_addr(vnetd); + uint16_t port = vnetd_mcast_port(vnetd); + int flags = VSOCK_REUSE; + int multicast = IN_MULTICAST(ntohl(addr)); + + flags |= VSOCK_MULTICAST; + flags |= VSOCK_BROADCAST; + + err = create_socket(SOCK_DGRAM, addr, port, flags, val); + if(err < 0) goto exit; + if(multicast){ + err = setsock_multicast_ttl((*val)->sock, 1); + if(err < 0) goto exit; + } + if(0){ + struct sockaddr * addr = (struct sockaddr *)&vnetd->addr; + socklen_t addr_n = sizeof(vnetd->addr); + dprintf("> sock=%d bind addr=%s:%d\n", + (*val)->sock, inet_ntoa(vnetd->addr.sin_addr), ntohs(vnetd->addr.sin_port)); + err = bind((*val)->sock, addr, addr_n); + if(err < 0){ + err = -errno; + perror("bind"); + goto exit; + } + } + if(0){ + struct sockaddr_in self = {}; + socklen_t self_n; + getsockname((*val)->sock, (struct sockaddr *)&self, &self_n); + dprintf("> sockname sock=%d addr=%s port=%d\n", + (*val)->sock, inet_ntoa(self.sin_addr), ntohs(self.sin_port)); + } + exit: + return err; +} + +/** Type for signal handling functions. */ +typedef void SignalAction(int code, siginfo_t *info, void *data); + +/** Handle SIGCHLD by getting child exit status. + * This prevents child processes being defunct. + * + * @param code signal code + * @param info signal info + * @param data + */ +static void sigaction_SIGCHLD(int code, siginfo_t *info, void *data){ + int status; + pid_t pid; + pid = wait(&status); + dprintf("> child pid=%d status=%d\n", pid, status); +} + +/** Handle SIGPIPE. + * + * @param code signal code + * @param info signal info + * @param data + */ +static void sigaction_SIGPIPE(int code, siginfo_t *info, void *data){ + dprintf("> SIGPIPE\n"); +} + +/** Handle SIGALRM. + * + * @param code signal code + * @param info signal info + * @param data + */ +static void sigaction_SIGALRM(int code, siginfo_t *info, void *data){ + //dprintf("> SIGALRM\n"); + timer_alarms++; +} + +/** Install a handler for a signal. + * + * @param signum signal + * @param action handler + * @return 0 on success, error code otherwise + */ +static int catch_signal(int signum, SignalAction *action){ + int err = 0; + struct sigaction sig = {}; + sig.sa_sigaction = action; + sig.sa_flags = SA_SIGINFO; + err = sigaction(signum, &sig, NULL); + if(err){ + perror("sigaction"); + } + return err; +} + +/** Create a raw socket. + * + * @param protocol protocol + * @param flags flags + * @param sock return value for the socket + */ +int vnetd_raw_socket(int protocol, int flags, uint32_t mcaddr, int *sock){ + int err; + int bcast = (flags & VSOCK_BROADCAST); + //dprintf("> protocol=%d\n", protocol); + err = *sock = socket(AF_INET, SOCK_RAW, protocol); + if(err < 0){ + err = -errno; + perror("socket"); + goto exit; + } + if(bcast){ + err = setsock_broadcast(*sock, bcast); + if(err < 0) goto exit; + } + if(flags & VSOCK_MULTICAST){ + err = setsock_multicast(*sock, mcaddr); + if(err < 0) goto exit; + } + exit: + //dprintf("< err=%d\n", err); + return err; +} + +/** Connect to peer vnetds. + * + * @param vnetd vnetd + * @return 0 on success, error code otherwise + */ +int vnetd_peers(Vnetd *vnetd){ + int err =0; + Sxpr x, l; + struct in_addr addr = {}; + for(l = vnetd->peers; CONSP(l); l = CDR(l)){ + x = CAR(l); + addr.s_addr = OBJ_INT(x); + vnetd_connect(vnetd, addr, vnetd->peer_port); + } + return err; +} + +/** Vnet daemon main program. + * + * @param vnetd program arguments + * @return 0 on success, error code otherwise + */ +int vnetd_main(Vnetd *vnetd){ + int err = 0; + + //dprintf(">\n"); + err = get_self_addr(&vnetd->addr); + vnetd->addr.sin_port = vnetd->port; + iprintf("> VNETD\n"); + iprintf("> addr=%s port=%u\n", + inet_ntoa(vnetd->addr.sin_addr), htons(vnetd->port)); + iprintf("> mcaddr=%s port=%u\n", + inet_ntoa(vnetd->mcast_addr.sin_addr), htons(vnetd->port)); + iprintf("> peers port=%u ", htons(vnetd->peer_port)); + objprint(iostdout, vnetd->peers, 0); printf("\n"); + + err = vcache_init(); + err = vnetd_peers(vnetd); + + catch_signal(SIGCHLD,sigaction_SIGCHLD); + catch_signal(SIGPIPE,sigaction_SIGPIPE); + catch_signal(SIGALRM,sigaction_SIGALRM); + err = vnetd_listen_conn(vnetd, &vnetd->listen_conn); + if(err < 0) goto exit; + err = vnetd_udp_conn(vnetd, &vnetd->udp_conn); + if(err < 0) goto exit; + err = vnetd_broadcast_conn(vnetd, &vnetd->bcast_conn); + if(err < 0) goto exit; + { + int flags = VSOCK_BROADCAST | VSOCK_MULTICAST; + uint32_t mcaddr = vnetd->mcast_addr.sin_addr.s_addr; + + err = vnetd_raw_socket(IPPROTO_ETHERIP, flags, mcaddr, &vnetd->etherip_sock); + if(err < 0) goto exit; + err = vnetd_raw_socket(IPPROTO_ESP, flags, mcaddr, &vnetd->esp_sock); + if(err < 0) goto exit; + } + err = vnetd_select(vnetd); + exit: + Conn_close(vnetd->listen_conn); + Conn_close(vnetd->udp_conn); + Conn_close(vnetd->bcast_conn); + connections_close_all(vnetd); + close(vnetd->etherip_sock); + close(vnetd->esp_sock); + //dprintf("< err=%d\n", err); + return err; +} + +/** Parse command-line arguments and call the vnetd main program. + * + * @param arg argument count + * @param argv arguments + * @return 0 on success, 1 otherwise + */ +extern int main(int argc, char *argv[]){ + int err = 0; + int key = 0; + int long_index = 0; + + vnetd_set_defaults(vnetd); + while(1){ + key = getopt_long(argc, argv, short_opts, long_opts, &long_index); + if(key == -1) break; + switch(key){ + case OPT_ADDR:{ + unsigned long addr; + err = get_host_address(optarg, &addr); + if(err) goto exit; + vnetd->mcast_addr.sin_addr.s_addr = addr; + break; } + case OPT_PORT: + err = convert_service_to_port(optarg, &vnetd->port); + if(err) goto exit; + break; + case OPT_PEER:{ + unsigned long addr; + err = get_host_address(optarg, &addr); + if(err) goto exit; + //cons_push(&vnetd->peers, mkaddress(addr)); + cons_push(&vnetd->peers, mkint(addr)); + break; } + case OPT_HELP: + usage(0); + break; + case OPT_VERBOSE: + vnetd->verbose = TRUE; + break; + case OPT_VERSION: + iprintf("> %s %s\n", PROGRAM, VERSION); + exit(0); + break; + default: + usage(EINVAL); + break; + } + } + err = vnetd_main(vnetd); + exit: + if(err && key > 0){ + eprintf("> Error in arg %c\n", key); + } + return (err ? 1 : 0); +} diff --git a/tools/vnet/vnetd/vnetd.h b/tools/vnet/vnetd/vnetd.h new file mode 100644 index 0000000000..757b003b50 --- /dev/null +++ b/tools/vnet/vnetd/vnetd.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2004 Mike Wray . + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. This library is + * distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _VNET_VNETD_H_ +#define _VNET_VNETD_H_ + +#include +#include +#include "if_varp.h" + +#include "connection.h" +#include "sxpr.h" + +/** Vnetd udp port in host order. */ +#define VNETD_PORT VARP_PORT + +/** Vnetd peer port in host order. */ +#define VNETD_PEER_PORT (VARP_PORT + 1) + +typedef struct VnetMsgVarp { + VarpHdr varph; +} VnetMsgVarp; + +#define VNET_FWD_MAX (1500 + 200) + +typedef struct VnetMsgFwd { + VnetMsgHdr; + uint16_t protocol; + uint16_t len; + uint8_t data[VNET_FWD_MAX]; +} __attribute__((packed)) VnetMsgFwd; + +typedef union VnetMsg { + VnetMsgHdr hdr; + VnetMsgVarp varp; + VnetMsgFwd fwd; +} VnetMsg; + +enum { + VNET_VARP_ID = VARP_ID, + VNET_FWD_ID = 200, +}; + +typedef struct Vnetd { + unsigned long port; + unsigned long peer_port; + int verbose; + + int esp_sock; + int etherip_sock; + + struct sockaddr_in addr; + struct sockaddr_in mcast_addr; + + Sxpr peers; + + Conn *listen_conn; + Conn *udp_conn; + Conn *bcast_conn; + + ConnList *connections; + +} Vnetd; + +extern Vnetd *vnetd; + +#endif /* ! _VNET_VNETD_H_ */ -- 2.30.2